{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999366420274551, "global_step": 11830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.403590202331543, "epoch": 0.0, "learning_rate": 4.2265426880811495e-08, "loss": 10.0071, "step": 1, "task_loss": 4.893718242645264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.050601959228516, "epoch": 0.0, "learning_rate": 8.453085376162299e-08, "loss": 11.6804, "step": 2, "task_loss": 4.735933303833008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.589404106140137, "epoch": 0.0, "learning_rate": 1.267962806424345e-07, "loss": 10.5271, "step": 3, "task_loss": 4.754335880279541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.161865234375, "epoch": 0.0, "learning_rate": 1.6906170752324598e-07, "loss": 10.5263, "step": 4, "task_loss": 4.63836145401001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.924878120422363, "epoch": 0.0, "learning_rate": 2.113271344040575e-07, "loss": 10.962, "step": 5, "task_loss": 4.719663143157959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.134042739868164, "epoch": 0.01, "learning_rate": 2.53592561284869e-07, "loss": 10.352, "step": 6, "task_loss": 4.635924816131592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.69445514678955, "epoch": 0.01, "learning_rate": 2.958579881656805e-07, "loss": 10.8653, "step": 7, "task_loss": 4.6902756690979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.52145004272461, "epoch": 0.01, "learning_rate": 3.3812341504649196e-07, "loss": 10.892, "step": 8, "task_loss": 4.786701679229736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.859132766723633, "epoch": 0.01, "learning_rate": 3.803888419273035e-07, "loss": 9.9271, "step": 9, "task_loss": 4.666134357452393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.892845153808594, "epoch": 0.01, "learning_rate": 4.22654268808115e-07, "loss": 11.1902, "step": 10, "task_loss": 4.827131271362305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.09437370300293, "epoch": 0.01, "learning_rate": 4.649196956889265e-07, "loss": 10.3811, "step": 11, "task_loss": 4.565781593322754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.831682205200195, "epoch": 0.01, "learning_rate": 5.07185122569738e-07, "loss": 11.1487, "step": 12, "task_loss": 4.640714168548584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.277006149291992, "epoch": 0.01, "learning_rate": 5.494505494505495e-07, "loss": 10.7189, "step": 13, "task_loss": 4.608959674835205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.255281448364258, "epoch": 0.01, "learning_rate": 5.91715976331361e-07, "loss": 10.9164, "step": 14, "task_loss": 4.694131374359131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.481856346130371, "epoch": 0.01, "learning_rate": 6.339814032121725e-07, "loss": 10.8891, "step": 15, "task_loss": 4.608437538146973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.204913139343262, "epoch": 0.01, "learning_rate": 6.762468300929839e-07, "loss": 10.847, "step": 16, "task_loss": 4.60659122467041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.41554069519043, "epoch": 0.01, "learning_rate": 7.185122569737954e-07, "loss": 10.6778, "step": 17, "task_loss": 4.496872901916504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.610633850097656, "epoch": 0.02, "learning_rate": 7.60777683854607e-07, "loss": 10.4457, "step": 18, "task_loss": 4.7978034019470215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.160855293273926, "epoch": 0.02, "learning_rate": 8.030431107354184e-07, "loss": 10.2446, "step": 19, "task_loss": 4.690127849578857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.536073684692383, "epoch": 0.02, "learning_rate": 8.4530853761623e-07, "loss": 10.7825, "step": 20, "task_loss": 4.7647786140441895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.957894325256348, "epoch": 0.02, "learning_rate": 8.875739644970415e-07, "loss": 10.986, "step": 21, "task_loss": 4.757566452026367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.082231521606445, "epoch": 0.02, "learning_rate": 9.29839391377853e-07, "loss": 10.8411, "step": 22, "task_loss": 4.698436737060547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.1494140625, "epoch": 0.02, "learning_rate": 9.721048182586645e-07, "loss": 10.9545, "step": 23, "task_loss": 4.758025646209717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.298805236816406, "epoch": 0.02, "learning_rate": 1.014370245139476e-06, "loss": 11.4158, "step": 24, "task_loss": 4.608049392700195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.7218017578125, "epoch": 0.02, "learning_rate": 1.0566356720202875e-06, "loss": 10.7729, "step": 25, "task_loss": 4.495795249938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.641130447387695, "epoch": 0.02, "learning_rate": 1.098901098901099e-06, "loss": 11.0989, "step": 26, "task_loss": 4.5820488929748535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.282276153564453, "epoch": 0.02, "learning_rate": 1.1411665257819105e-06, "loss": 10.9302, "step": 27, "task_loss": 4.623043537139893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.058012008666992, "epoch": 0.02, "learning_rate": 1.183431952662722e-06, "loss": 11.944, "step": 28, "task_loss": 4.556249618530273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.540451049804688, "epoch": 0.02, "learning_rate": 1.2256973795435333e-06, "loss": 10.6407, "step": 29, "task_loss": 4.739696025848389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.025243759155273, "epoch": 0.03, "learning_rate": 1.267962806424345e-06, "loss": 11.1553, "step": 30, "task_loss": 4.695968151092529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.77232837677002, "epoch": 0.03, "learning_rate": 1.3102282333051563e-06, "loss": 10.7184, "step": 31, "task_loss": 4.6045002937316895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.93844985961914, "epoch": 0.03, "learning_rate": 1.3524936601859678e-06, "loss": 10.9748, "step": 32, "task_loss": 4.716665267944336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.5638427734375, "epoch": 0.03, "learning_rate": 1.3947590870667795e-06, "loss": 10.4731, "step": 33, "task_loss": 4.525957107543945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.45874309539795, "epoch": 0.03, "learning_rate": 1.4370245139475908e-06, "loss": 10.0613, "step": 34, "task_loss": 4.668154716491699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.873498916625977, "epoch": 0.03, "learning_rate": 1.4792899408284024e-06, "loss": 10.7943, "step": 35, "task_loss": 4.735232830047607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 14.68942642211914, "epoch": 0.03, "learning_rate": 1.521555367709214e-06, "loss": 11.6994, "step": 36, "task_loss": 4.761033058166504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.104684829711914, "epoch": 0.03, "learning_rate": 1.5638207945900256e-06, "loss": 10.2657, "step": 37, "task_loss": 4.6487507820129395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.520551681518555, "epoch": 0.03, "learning_rate": 1.6060862214708369e-06, "loss": 11.6035, "step": 38, "task_loss": 4.661336898803711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.183745384216309, "epoch": 0.03, "learning_rate": 1.6483516483516484e-06, "loss": 11.5555, "step": 39, "task_loss": 4.7185587882995605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.789106369018555, "epoch": 0.03, "learning_rate": 1.69061707523246e-06, "loss": 10.302, "step": 40, "task_loss": 4.583945274353027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.97212028503418, "epoch": 0.03, "learning_rate": 1.7328825021132714e-06, "loss": 10.2057, "step": 41, "task_loss": 4.640735149383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.867840766906738, "epoch": 0.04, "learning_rate": 1.775147928994083e-06, "loss": 10.988, "step": 42, "task_loss": 4.705913543701172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.908050537109375, "epoch": 0.04, "learning_rate": 1.8174133558748946e-06, "loss": 10.9816, "step": 43, "task_loss": 4.523036479949951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.694483757019043, "epoch": 0.04, "learning_rate": 1.859678782755706e-06, "loss": 10.1506, "step": 44, "task_loss": 4.536032199859619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.424906730651855, "epoch": 0.04, "learning_rate": 1.9019442096365174e-06, "loss": 11.1176, "step": 45, "task_loss": 4.643701553344727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.884790420532227, "epoch": 0.04, "learning_rate": 1.944209636517329e-06, "loss": 11.319, "step": 46, "task_loss": 4.609011650085449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.962676048278809, "epoch": 0.04, "learning_rate": 1.9864750633981404e-06, "loss": 10.0115, "step": 47, "task_loss": 4.765456199645996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.796348571777344, "epoch": 0.04, "learning_rate": 2.028740490278952e-06, "loss": 11.2703, "step": 48, "task_loss": 4.579859733581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.289663314819336, "epoch": 0.04, "learning_rate": 2.0710059171597635e-06, "loss": 9.8133, "step": 49, "task_loss": 4.583054542541504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.093294143676758, "epoch": 0.04, "learning_rate": 2.113271344040575e-06, "loss": 10.9728, "step": 50, "task_loss": 4.500999927520752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.974398612976074, "epoch": 0.04, "learning_rate": 2.1555367709213865e-06, "loss": 11.1329, "step": 51, "task_loss": 4.763656139373779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.954800605773926, "epoch": 0.04, "learning_rate": 2.197802197802198e-06, "loss": 10.6315, "step": 52, "task_loss": 4.7645440101623535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.320015907287598, "epoch": 0.04, "learning_rate": 2.2400676246830095e-06, "loss": 10.9405, "step": 53, "task_loss": 4.646806240081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.694287300109863, "epoch": 0.05, "learning_rate": 2.282333051563821e-06, "loss": 10.8696, "step": 54, "task_loss": 4.7642822265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.622802734375, "epoch": 0.05, "learning_rate": 2.324598478444632e-06, "loss": 10.468, "step": 55, "task_loss": 4.651461601257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.191650390625, "epoch": 0.05, "learning_rate": 2.366863905325444e-06, "loss": 11.1634, "step": 56, "task_loss": 4.511422634124756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.475743293762207, "epoch": 0.05, "learning_rate": 2.4091293322062555e-06, "loss": 10.7596, "step": 57, "task_loss": 4.732883930206299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.170340538024902, "epoch": 0.05, "learning_rate": 2.4513947590870666e-06, "loss": 10.5071, "step": 58, "task_loss": 4.723390102386475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.606315612792969, "epoch": 0.05, "learning_rate": 2.4936601859678785e-06, "loss": 10.9258, "step": 59, "task_loss": 4.5483927726745605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.338319778442383, "epoch": 0.05, "learning_rate": 2.53592561284869e-06, "loss": 11.211, "step": 60, "task_loss": 4.594723224639893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.50085163116455, "epoch": 0.05, "learning_rate": 2.578191039729501e-06, "loss": 10.1077, "step": 61, "task_loss": 4.477964878082275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.907859802246094, "epoch": 0.05, "learning_rate": 2.6204564666103126e-06, "loss": 10.416, "step": 62, "task_loss": 4.755913257598877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.288369178771973, "epoch": 0.05, "learning_rate": 2.6627218934911246e-06, "loss": 10.6632, "step": 63, "task_loss": 4.57666015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.502639770507812, "epoch": 0.05, "learning_rate": 2.7049873203719357e-06, "loss": 10.5185, "step": 64, "task_loss": 4.579930782318115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.63343620300293, "epoch": 0.05, "learning_rate": 2.747252747252747e-06, "loss": 11.4454, "step": 65, "task_loss": 4.677177906036377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.630847930908203, "epoch": 0.06, "learning_rate": 2.789518174133559e-06, "loss": 11.0318, "step": 66, "task_loss": 4.614037036895752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.591766357421875, "epoch": 0.06, "learning_rate": 2.83178360101437e-06, "loss": 10.7626, "step": 67, "task_loss": 4.582085609436035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.07567024230957, "epoch": 0.06, "learning_rate": 2.8740490278951817e-06, "loss": 10.6236, "step": 68, "task_loss": 4.833033084869385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.121071815490723, "epoch": 0.06, "learning_rate": 2.9163144547759936e-06, "loss": 11.1782, "step": 69, "task_loss": 4.737482070922852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.720462799072266, "epoch": 0.06, "learning_rate": 2.9585798816568047e-06, "loss": 10.637, "step": 70, "task_loss": 4.638803958892822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.318552017211914, "epoch": 0.06, "learning_rate": 3.0008453085376162e-06, "loss": 10.0053, "step": 71, "task_loss": 4.742284774780273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.845678329467773, "epoch": 0.06, "learning_rate": 3.043110735418428e-06, "loss": 10.2372, "step": 72, "task_loss": 4.62070369720459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.64316177368164, "epoch": 0.06, "learning_rate": 3.0853761622992392e-06, "loss": 11.5501, "step": 73, "task_loss": 4.555132865905762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.010642051696777, "epoch": 0.06, "learning_rate": 3.127641589180051e-06, "loss": 10.2998, "step": 74, "task_loss": 4.748885154724121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.951274871826172, "epoch": 0.06, "learning_rate": 3.1699070160608622e-06, "loss": 10.6523, "step": 75, "task_loss": 4.4831132888793945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.449923515319824, "epoch": 0.06, "learning_rate": 3.2121724429416738e-06, "loss": 10.7146, "step": 76, "task_loss": 4.62700891494751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.358880996704102, "epoch": 0.07, "learning_rate": 3.2544378698224853e-06, "loss": 11.1746, "step": 77, "task_loss": 4.609684467315674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.9044771194458, "epoch": 0.07, "learning_rate": 3.2967032967032968e-06, "loss": 11.0865, "step": 78, "task_loss": 4.658127784729004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.360875129699707, "epoch": 0.07, "learning_rate": 3.3389687235841087e-06, "loss": 10.2149, "step": 79, "task_loss": 4.5647125244140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.531564712524414, "epoch": 0.07, "learning_rate": 3.38123415046492e-06, "loss": 11.6188, "step": 80, "task_loss": 4.519094944000244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.807174682617188, "epoch": 0.07, "learning_rate": 3.4234995773457313e-06, "loss": 10.3161, "step": 81, "task_loss": 4.749982833862305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.913570404052734, "epoch": 0.07, "learning_rate": 3.465765004226543e-06, "loss": 11.4496, "step": 82, "task_loss": 4.597235202789307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.739767074584961, "epoch": 0.07, "learning_rate": 3.5080304311073543e-06, "loss": 12.0094, "step": 83, "task_loss": 4.527524948120117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.401664733886719, "epoch": 0.07, "learning_rate": 3.550295857988166e-06, "loss": 11.7643, "step": 84, "task_loss": 4.5950927734375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.095552444458008, "epoch": 0.07, "learning_rate": 3.5925612848689777e-06, "loss": 11.1968, "step": 85, "task_loss": 4.579239845275879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.871604919433594, "epoch": 0.07, "learning_rate": 3.6348267117497893e-06, "loss": 10.4665, "step": 86, "task_loss": 4.7465128898620605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.213700294494629, "epoch": 0.07, "learning_rate": 3.6770921386306e-06, "loss": 10.4609, "step": 87, "task_loss": 4.574832916259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.151535034179688, "epoch": 0.07, "learning_rate": 3.719357565511412e-06, "loss": 10.6299, "step": 88, "task_loss": 4.6177802085876465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.927519798278809, "epoch": 0.08, "learning_rate": 3.7616229923922234e-06, "loss": 11.6768, "step": 89, "task_loss": 4.618904113769531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.138202667236328, "epoch": 0.08, "learning_rate": 3.803888419273035e-06, "loss": 10.4512, "step": 90, "task_loss": 4.729623794555664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.830541610717773, "epoch": 0.08, "learning_rate": 3.846153846153847e-06, "loss": 10.7422, "step": 91, "task_loss": 4.632906913757324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.520606994628906, "epoch": 0.08, "learning_rate": 3.888419273034658e-06, "loss": 10.2491, "step": 92, "task_loss": 4.691428184509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.747093200683594, "epoch": 0.08, "learning_rate": 3.930684699915469e-06, "loss": 10.4616, "step": 93, "task_loss": 4.500308513641357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.003189086914062, "epoch": 0.08, "learning_rate": 3.972950126796281e-06, "loss": 11.4123, "step": 94, "task_loss": 4.640787124633789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.187047958374023, "epoch": 0.08, "learning_rate": 4.015215553677092e-06, "loss": 10.5413, "step": 95, "task_loss": 4.3826751708984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.307405471801758, "epoch": 0.08, "learning_rate": 4.057480980557904e-06, "loss": 10.7577, "step": 96, "task_loss": 4.4358978271484375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.256270408630371, "epoch": 0.08, "learning_rate": 4.099746407438716e-06, "loss": 10.316, "step": 97, "task_loss": 4.61489725112915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.962421417236328, "epoch": 0.08, "learning_rate": 4.142011834319527e-06, "loss": 10.7515, "step": 98, "task_loss": 4.597455978393555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.505260467529297, "epoch": 0.08, "learning_rate": 4.184277261200338e-06, "loss": 11.2725, "step": 99, "task_loss": 4.555180549621582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.828603744506836, "epoch": 0.08, "learning_rate": 4.22654268808115e-06, "loss": 10.475, "step": 100, "task_loss": 4.5248332023620605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.866134643554688, "epoch": 0.09, "learning_rate": 4.268808114961961e-06, "loss": 10.3716, "step": 101, "task_loss": 4.572544097900391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.741941452026367, "epoch": 0.09, "learning_rate": 4.311073541842773e-06, "loss": 10.1696, "step": 102, "task_loss": 4.495937824249268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.36722183227539, "epoch": 0.09, "learning_rate": 4.353338968723585e-06, "loss": 10.0955, "step": 103, "task_loss": 4.5355353355407715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.730938911437988, "epoch": 0.09, "learning_rate": 4.395604395604396e-06, "loss": 10.3114, "step": 104, "task_loss": 4.594440937042236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.972132682800293, "epoch": 0.09, "learning_rate": 4.437869822485207e-06, "loss": 10.5973, "step": 105, "task_loss": 4.579448699951172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.593635559082031, "epoch": 0.09, "learning_rate": 4.480135249366019e-06, "loss": 11.1273, "step": 106, "task_loss": 4.613135814666748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.411273956298828, "epoch": 0.09, "learning_rate": 4.52240067624683e-06, "loss": 10.7971, "step": 107, "task_loss": 4.618113994598389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.856842994689941, "epoch": 0.09, "learning_rate": 4.564666103127642e-06, "loss": 10.8939, "step": 108, "task_loss": 4.570024490356445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.126867294311523, "epoch": 0.09, "learning_rate": 4.606931530008454e-06, "loss": 10.2366, "step": 109, "task_loss": 4.447387218475342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.89476490020752, "epoch": 0.09, "learning_rate": 4.649196956889264e-06, "loss": 11.0047, "step": 110, "task_loss": 4.466702938079834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.032048225402832, "epoch": 0.09, "learning_rate": 4.691462383770076e-06, "loss": 9.8645, "step": 111, "task_loss": 4.414778709411621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.524507522583008, "epoch": 0.09, "learning_rate": 4.733727810650888e-06, "loss": 10.1538, "step": 112, "task_loss": 4.66260290145874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.36578369140625, "epoch": 0.1, "learning_rate": 4.775993237531699e-06, "loss": 10.9724, "step": 113, "task_loss": 4.530307292938232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.551671981811523, "epoch": 0.1, "learning_rate": 4.818258664412511e-06, "loss": 11.1853, "step": 114, "task_loss": 4.478623390197754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.023828506469727, "epoch": 0.1, "learning_rate": 4.860524091293322e-06, "loss": 10.0482, "step": 115, "task_loss": 4.4415788650512695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.032001495361328, "epoch": 0.1, "learning_rate": 4.902789518174133e-06, "loss": 10.4372, "step": 116, "task_loss": 4.519935607910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.225025177001953, "epoch": 0.1, "learning_rate": 4.945054945054945e-06, "loss": 10.6497, "step": 117, "task_loss": 4.48813533782959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.889101028442383, "epoch": 0.1, "learning_rate": 4.987320371935757e-06, "loss": 10.9735, "step": 118, "task_loss": 4.515108108520508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.609251976013184, "epoch": 0.1, "learning_rate": 5.029585798816568e-06, "loss": 11.3165, "step": 119, "task_loss": 4.3884382247924805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.96807861328125, "epoch": 0.1, "learning_rate": 5.07185122569738e-06, "loss": 10.5082, "step": 120, "task_loss": 4.469944000244141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.186902046203613, "epoch": 0.1, "learning_rate": 5.114116652578191e-06, "loss": 10.4668, "step": 121, "task_loss": 4.52056360244751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.834338188171387, "epoch": 0.1, "learning_rate": 5.156382079459002e-06, "loss": 10.3641, "step": 122, "task_loss": 4.4478607177734375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.181888580322266, "epoch": 0.1, "learning_rate": 5.198647506339814e-06, "loss": 10.5963, "step": 123, "task_loss": 4.391740322113037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.219062805175781, "epoch": 0.1, "learning_rate": 5.240912933220625e-06, "loss": 10.2392, "step": 124, "task_loss": 4.6055121421813965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.98564338684082, "epoch": 0.11, "learning_rate": 5.283178360101437e-06, "loss": 10.3353, "step": 125, "task_loss": 4.401786804199219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.830859184265137, "epoch": 0.11, "learning_rate": 5.325443786982249e-06, "loss": 10.5764, "step": 126, "task_loss": 4.343116283416748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.530597686767578, "epoch": 0.11, "learning_rate": 5.36770921386306e-06, "loss": 10.843, "step": 127, "task_loss": 4.556358337402344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.160400390625, "epoch": 0.11, "learning_rate": 5.409974640743871e-06, "loss": 9.9327, "step": 128, "task_loss": 4.309567928314209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.638243675231934, "epoch": 0.11, "learning_rate": 5.452240067624683e-06, "loss": 9.9881, "step": 129, "task_loss": 4.514079570770264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.311914443969727, "epoch": 0.11, "learning_rate": 5.494505494505494e-06, "loss": 10.6223, "step": 130, "task_loss": 4.538150310516357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.595855712890625, "epoch": 0.11, "learning_rate": 5.536770921386306e-06, "loss": 10.9531, "step": 131, "task_loss": 4.534841060638428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.290630340576172, "epoch": 0.11, "learning_rate": 5.579036348267118e-06, "loss": 10.7856, "step": 132, "task_loss": 4.390566825866699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.233758926391602, "epoch": 0.11, "learning_rate": 5.621301775147929e-06, "loss": 10.3096, "step": 133, "task_loss": 4.5218892097473145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.774062156677246, "epoch": 0.11, "learning_rate": 5.66356720202874e-06, "loss": 9.9051, "step": 134, "task_loss": 4.409582138061523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.664326667785645, "epoch": 0.11, "learning_rate": 5.705832628909552e-06, "loss": 11.0271, "step": 135, "task_loss": 4.45792293548584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.76945972442627, "epoch": 0.11, "learning_rate": 5.748098055790363e-06, "loss": 10.5237, "step": 136, "task_loss": 4.52976655960083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.669245719909668, "epoch": 0.12, "learning_rate": 5.790363482671175e-06, "loss": 10.5259, "step": 137, "task_loss": 4.511288642883301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.659748077392578, "epoch": 0.12, "learning_rate": 5.832628909551987e-06, "loss": 10.0496, "step": 138, "task_loss": 4.538107395172119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.318220138549805, "epoch": 0.12, "learning_rate": 5.874894336432798e-06, "loss": 11.0436, "step": 139, "task_loss": 4.42420768737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.603837966918945, "epoch": 0.12, "learning_rate": 5.917159763313609e-06, "loss": 11.0026, "step": 140, "task_loss": 4.447585582733154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.475722312927246, "epoch": 0.12, "learning_rate": 5.959425190194421e-06, "loss": 9.663, "step": 141, "task_loss": 4.526320457458496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.151780128479004, "epoch": 0.12, "learning_rate": 6.0016906170752324e-06, "loss": 10.0851, "step": 142, "task_loss": 4.430243492126465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.196475982666016, "epoch": 0.12, "learning_rate": 6.043956043956044e-06, "loss": 10.9937, "step": 143, "task_loss": 4.319927215576172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.272387504577637, "epoch": 0.12, "learning_rate": 6.086221470836856e-06, "loss": 10.9631, "step": 144, "task_loss": 4.487193584442139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.366275787353516, "epoch": 0.12, "learning_rate": 6.128486897717667e-06, "loss": 10.8045, "step": 145, "task_loss": 4.4568257331848145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.08237075805664, "epoch": 0.12, "learning_rate": 6.1707523245984785e-06, "loss": 10.6496, "step": 146, "task_loss": 4.381575584411621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.435718536376953, "epoch": 0.12, "learning_rate": 6.21301775147929e-06, "loss": 10.4635, "step": 147, "task_loss": 4.340198040008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.488754272460938, "epoch": 0.13, "learning_rate": 6.255283178360102e-06, "loss": 10.2172, "step": 148, "task_loss": 4.461639881134033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.088775634765625, "epoch": 0.13, "learning_rate": 6.297548605240913e-06, "loss": 10.1736, "step": 149, "task_loss": 4.251922607421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.592573165893555, "epoch": 0.13, "learning_rate": 6.3398140321217245e-06, "loss": 10.8346, "step": 150, "task_loss": 4.493696689605713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.482400894165039, "epoch": 0.13, "learning_rate": 6.382079459002536e-06, "loss": 9.5301, "step": 151, "task_loss": 4.568138599395752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.23709487915039, "epoch": 0.13, "learning_rate": 6.4243448858833475e-06, "loss": 10.5568, "step": 152, "task_loss": 4.415182113647461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.710551261901855, "epoch": 0.13, "learning_rate": 6.4666103127641594e-06, "loss": 10.2834, "step": 153, "task_loss": 4.313751697540283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.814706802368164, "epoch": 0.13, "learning_rate": 6.5088757396449705e-06, "loss": 9.9942, "step": 154, "task_loss": 4.347652912139893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.893689155578613, "epoch": 0.13, "learning_rate": 6.551141166525782e-06, "loss": 10.0578, "step": 155, "task_loss": 4.3386077880859375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.638946533203125, "epoch": 0.13, "learning_rate": 6.5934065934065935e-06, "loss": 10.1688, "step": 156, "task_loss": 4.252321243286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.796585083007812, "epoch": 0.13, "learning_rate": 6.635672020287405e-06, "loss": 9.85, "step": 157, "task_loss": 4.514593601226807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.203529357910156, "epoch": 0.13, "learning_rate": 6.677937447168217e-06, "loss": 10.4681, "step": 158, "task_loss": 4.236311435699463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.074034690856934, "epoch": 0.13, "learning_rate": 6.720202874049028e-06, "loss": 10.5984, "step": 159, "task_loss": 4.2907304763793945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.61819839477539, "epoch": 0.14, "learning_rate": 6.76246830092984e-06, "loss": 10.3344, "step": 160, "task_loss": 4.206151008605957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.346233367919922, "epoch": 0.14, "learning_rate": 6.8047337278106515e-06, "loss": 10.336, "step": 161, "task_loss": 4.290561676025391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 13.287874221801758, "epoch": 0.14, "learning_rate": 6.846999154691463e-06, "loss": 10.933, "step": 162, "task_loss": 4.171278953552246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.964640617370605, "epoch": 0.14, "learning_rate": 6.8892645815722745e-06, "loss": 10.8671, "step": 163, "task_loss": 4.706011772155762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.779141426086426, "epoch": 0.14, "learning_rate": 6.931530008453086e-06, "loss": 9.953, "step": 164, "task_loss": 4.320837020874023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.711145401000977, "epoch": 0.14, "learning_rate": 6.9737954353338975e-06, "loss": 10.4274, "step": 165, "task_loss": 4.2374982833862305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.074871063232422, "epoch": 0.14, "learning_rate": 7.016060862214709e-06, "loss": 10.7103, "step": 166, "task_loss": 4.251299858093262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.305875778198242, "epoch": 0.14, "learning_rate": 7.05832628909552e-06, "loss": 10.3059, "step": 167, "task_loss": 4.345815658569336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.87346363067627, "epoch": 0.14, "learning_rate": 7.100591715976332e-06, "loss": 9.5595, "step": 168, "task_loss": 4.096438407897949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.741791725158691, "epoch": 0.14, "learning_rate": 7.142857142857143e-06, "loss": 11.1149, "step": 169, "task_loss": 4.1396074295043945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.619377136230469, "epoch": 0.14, "learning_rate": 7.1851225697379555e-06, "loss": 11.1664, "step": 170, "task_loss": 4.154287815093994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.982399940490723, "epoch": 0.14, "learning_rate": 7.227387996618766e-06, "loss": 10.2731, "step": 171, "task_loss": 4.215027332305908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.182270050048828, "epoch": 0.15, "learning_rate": 7.2696534234995785e-06, "loss": 11.0735, "step": 172, "task_loss": 4.028750419616699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.805275917053223, "epoch": 0.15, "learning_rate": 7.31191885038039e-06, "loss": 10.3169, "step": 173, "task_loss": 4.118965148925781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.965993881225586, "epoch": 0.15, "learning_rate": 7.3541842772612e-06, "loss": 9.3504, "step": 174, "task_loss": 4.2412614822387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.746776580810547, "epoch": 0.15, "learning_rate": 7.396449704142013e-06, "loss": 10.7342, "step": 175, "task_loss": 4.341203689575195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.264242172241211, "epoch": 0.15, "learning_rate": 7.438715131022824e-06, "loss": 9.9874, "step": 176, "task_loss": 4.498307228088379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.640728950500488, "epoch": 0.15, "learning_rate": 7.480980557903636e-06, "loss": 9.1746, "step": 177, "task_loss": 4.313640594482422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.954181671142578, "epoch": 0.15, "learning_rate": 7.523245984784447e-06, "loss": 10.0101, "step": 178, "task_loss": 4.221097469329834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.383469581604004, "epoch": 0.15, "learning_rate": 7.565511411665258e-06, "loss": 10.2038, "step": 179, "task_loss": 4.190242767333984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.425771713256836, "epoch": 0.15, "learning_rate": 7.60777683854607e-06, "loss": 9.2526, "step": 180, "task_loss": 4.167534828186035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.275524139404297, "epoch": 0.15, "learning_rate": 7.65004226542688e-06, "loss": 9.8853, "step": 181, "task_loss": 4.186546325683594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.3565673828125, "epoch": 0.15, "learning_rate": 7.692307692307694e-06, "loss": 10.179, "step": 182, "task_loss": 4.147878170013428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.274319648742676, "epoch": 0.15, "learning_rate": 7.734573119188505e-06, "loss": 9.8514, "step": 183, "task_loss": 4.218626022338867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.793763160705566, "epoch": 0.16, "learning_rate": 7.776838546069316e-06, "loss": 9.6777, "step": 184, "task_loss": 4.267410755157471 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.20872688293457, "epoch": 0.16, "learning_rate": 7.819103972950127e-06, "loss": 9.9578, "step": 185, "task_loss": 4.374319076538086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.686092376708984, "epoch": 0.16, "learning_rate": 7.861369399830938e-06, "loss": 9.45, "step": 186, "task_loss": 3.8152966499328613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.13048267364502, "epoch": 0.16, "learning_rate": 7.90363482671175e-06, "loss": 10.4769, "step": 187, "task_loss": 4.035782337188721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.271520614624023, "epoch": 0.16, "learning_rate": 7.945900253592562e-06, "loss": 10.5395, "step": 188, "task_loss": 4.126121997833252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.013580322265625, "epoch": 0.16, "learning_rate": 7.988165680473373e-06, "loss": 9.9373, "step": 189, "task_loss": 4.293776035308838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.108755111694336, "epoch": 0.16, "learning_rate": 8.030431107354184e-06, "loss": 10.2416, "step": 190, "task_loss": 3.8266375064849854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 12.703033447265625, "epoch": 0.16, "learning_rate": 8.072696534234995e-06, "loss": 11.1518, "step": 191, "task_loss": 4.055019378662109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.374983787536621, "epoch": 0.16, "learning_rate": 8.114961961115808e-06, "loss": 10.4757, "step": 192, "task_loss": 4.20020055770874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.990991592407227, "epoch": 0.16, "learning_rate": 8.157227387996619e-06, "loss": 9.7356, "step": 193, "task_loss": 4.024471759796143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.545437812805176, "epoch": 0.16, "learning_rate": 8.199492814877432e-06, "loss": 9.5969, "step": 194, "task_loss": 4.375448226928711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.055362701416016, "epoch": 0.16, "learning_rate": 8.241758241758243e-06, "loss": 10.1901, "step": 195, "task_loss": 4.030673027038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.091379165649414, "epoch": 0.17, "learning_rate": 8.284023668639054e-06, "loss": 9.8951, "step": 196, "task_loss": 4.250060558319092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.485389709472656, "epoch": 0.17, "learning_rate": 8.326289095519865e-06, "loss": 9.8495, "step": 197, "task_loss": 4.061662673950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.825084686279297, "epoch": 0.17, "learning_rate": 8.368554522400676e-06, "loss": 9.6397, "step": 198, "task_loss": 4.07577657699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.405561447143555, "epoch": 0.17, "learning_rate": 8.410819949281489e-06, "loss": 9.7127, "step": 199, "task_loss": 4.1694231033325195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.09040355682373, "epoch": 0.17, "learning_rate": 8.4530853761623e-06, "loss": 9.645, "step": 200, "task_loss": 4.187140464782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.289454460144043, "epoch": 0.17, "learning_rate": 8.495350803043111e-06, "loss": 10.6047, "step": 201, "task_loss": 4.169828414916992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.432872772216797, "epoch": 0.17, "learning_rate": 8.537616229923922e-06, "loss": 9.7342, "step": 202, "task_loss": 3.9641635417938232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.453047752380371, "epoch": 0.17, "learning_rate": 8.579881656804733e-06, "loss": 9.718, "step": 203, "task_loss": 3.986605405807495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 11.925058364868164, "epoch": 0.17, "learning_rate": 8.622147083685546e-06, "loss": 10.1314, "step": 204, "task_loss": 3.8888165950775146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.727497100830078, "epoch": 0.17, "learning_rate": 8.664412510566357e-06, "loss": 9.6088, "step": 205, "task_loss": 3.9736809730529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.890064239501953, "epoch": 0.17, "learning_rate": 8.70667793744717e-06, "loss": 9.1494, "step": 206, "task_loss": 3.6250922679901123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.582391738891602, "epoch": 0.17, "learning_rate": 8.74894336432798e-06, "loss": 9.3244, "step": 207, "task_loss": 4.294853210449219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.410274505615234, "epoch": 0.18, "learning_rate": 8.791208791208792e-06, "loss": 8.9835, "step": 208, "task_loss": 4.244947910308838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.908506393432617, "epoch": 0.18, "learning_rate": 8.833474218089603e-06, "loss": 9.7579, "step": 209, "task_loss": 4.046106815338135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.048578262329102, "epoch": 0.18, "learning_rate": 8.875739644970414e-06, "loss": 9.7513, "step": 210, "task_loss": 3.86310076713562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.475410461425781, "epoch": 0.18, "learning_rate": 8.918005071851227e-06, "loss": 9.129, "step": 211, "task_loss": 3.783229351043701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.169841766357422, "epoch": 0.18, "learning_rate": 8.960270498732038e-06, "loss": 9.9547, "step": 212, "task_loss": 3.966848611831665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.033048629760742, "epoch": 0.18, "learning_rate": 9.002535925612849e-06, "loss": 9.4696, "step": 213, "task_loss": 3.591090202331543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.172948837280273, "epoch": 0.18, "learning_rate": 9.04480135249366e-06, "loss": 8.8152, "step": 214, "task_loss": 3.9587864875793457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.5323486328125, "epoch": 0.18, "learning_rate": 9.087066779374471e-06, "loss": 9.4885, "step": 215, "task_loss": 3.749351978302002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.818955421447754, "epoch": 0.18, "learning_rate": 9.129332206255284e-06, "loss": 9.8823, "step": 216, "task_loss": 3.7121710777282715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.067737579345703, "epoch": 0.18, "learning_rate": 9.171597633136095e-06, "loss": 8.6905, "step": 217, "task_loss": 4.03646183013916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.002960205078125, "epoch": 0.18, "learning_rate": 9.213863060016908e-06, "loss": 9.009, "step": 218, "task_loss": 3.9315176010131836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.221734046936035, "epoch": 0.19, "learning_rate": 9.256128486897717e-06, "loss": 8.491, "step": 219, "task_loss": 3.6113595962524414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.229199409484863, "epoch": 0.19, "learning_rate": 9.298393913778528e-06, "loss": 8.931, "step": 220, "task_loss": 3.6735680103302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.432989120483398, "epoch": 0.19, "learning_rate": 9.340659340659341e-06, "loss": 9.4439, "step": 221, "task_loss": 3.8682448863983154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.591475486755371, "epoch": 0.19, "learning_rate": 9.382924767540152e-06, "loss": 8.6367, "step": 222, "task_loss": 3.29972505569458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.7122802734375, "epoch": 0.19, "learning_rate": 9.425190194420965e-06, "loss": 8.6281, "step": 223, "task_loss": 3.6467082500457764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.89112663269043, "epoch": 0.19, "learning_rate": 9.467455621301776e-06, "loss": 9.0989, "step": 224, "task_loss": 3.9356582164764404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.698701858520508, "epoch": 0.19, "learning_rate": 9.509721048182587e-06, "loss": 8.742, "step": 225, "task_loss": 3.239922046661377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.923625946044922, "epoch": 0.19, "learning_rate": 9.551986475063398e-06, "loss": 9.9672, "step": 226, "task_loss": 3.6118457317352295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.537036895751953, "epoch": 0.19, "learning_rate": 9.59425190194421e-06, "loss": 9.6852, "step": 227, "task_loss": 3.727559804916382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.187715530395508, "epoch": 0.19, "learning_rate": 9.636517328825022e-06, "loss": 9.2675, "step": 228, "task_loss": 3.956024408340454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.77787971496582, "epoch": 0.19, "learning_rate": 9.678782755705833e-06, "loss": 8.7379, "step": 229, "task_loss": 3.750727891921997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.334738731384277, "epoch": 0.19, "learning_rate": 9.721048182586644e-06, "loss": 9.2216, "step": 230, "task_loss": 4.032806396484375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.017505645751953, "epoch": 0.2, "learning_rate": 9.763313609467455e-06, "loss": 9.6687, "step": 231, "task_loss": 3.8098485469818115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.59679889678955, "epoch": 0.2, "learning_rate": 9.805579036348266e-06, "loss": 9.1146, "step": 232, "task_loss": 3.5098395347595215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.5515718460083, "epoch": 0.2, "learning_rate": 9.84784446322908e-06, "loss": 9.2432, "step": 233, "task_loss": 3.9476349353790283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.95006275177002, "epoch": 0.2, "learning_rate": 9.89010989010989e-06, "loss": 9.017, "step": 234, "task_loss": 3.2772388458251953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.057106018066406, "epoch": 0.2, "learning_rate": 9.932375316990703e-06, "loss": 9.0479, "step": 235, "task_loss": 3.8597865104675293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.000930786132812, "epoch": 0.2, "learning_rate": 9.974640743871514e-06, "loss": 8.7053, "step": 236, "task_loss": 3.56473445892334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.533687591552734, "epoch": 0.2, "learning_rate": 1.0016906170752325e-05, "loss": 9.0735, "step": 237, "task_loss": 3.6946725845336914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.793607711791992, "epoch": 0.2, "learning_rate": 1.0059171597633136e-05, "loss": 8.6857, "step": 238, "task_loss": 3.6462836265563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.320764541625977, "epoch": 0.2, "learning_rate": 1.0101437024513947e-05, "loss": 9.093, "step": 239, "task_loss": 3.744810104370117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.179412841796875, "epoch": 0.2, "learning_rate": 1.014370245139476e-05, "loss": 8.8603, "step": 240, "task_loss": 3.747973918914795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.077903747558594, "epoch": 0.2, "learning_rate": 1.0185967878275571e-05, "loss": 9.1231, "step": 241, "task_loss": 3.7238407135009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.257226943969727, "epoch": 0.2, "learning_rate": 1.0228233305156382e-05, "loss": 8.9915, "step": 242, "task_loss": 3.587797164916992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.561230659484863, "epoch": 0.21, "learning_rate": 1.0270498732037193e-05, "loss": 8.7602, "step": 243, "task_loss": 3.36590576171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.141240119934082, "epoch": 0.21, "learning_rate": 1.0312764158918005e-05, "loss": 8.9407, "step": 244, "task_loss": 3.527395248413086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.287521362304688, "epoch": 0.21, "learning_rate": 1.0355029585798817e-05, "loss": 8.4062, "step": 245, "task_loss": 3.1535964012145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.89594841003418, "epoch": 0.21, "learning_rate": 1.0397295012679628e-05, "loss": 8.7649, "step": 246, "task_loss": 3.2914223670959473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.468308448791504, "epoch": 0.21, "learning_rate": 1.0439560439560441e-05, "loss": 9.0471, "step": 247, "task_loss": 3.190307140350342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.224224090576172, "epoch": 0.21, "learning_rate": 1.048182586644125e-05, "loss": 8.7487, "step": 248, "task_loss": 3.277031660079956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.15646743774414, "epoch": 0.21, "learning_rate": 1.0524091293322063e-05, "loss": 9.5886, "step": 249, "task_loss": 3.405691385269165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.508550643920898, "epoch": 0.21, "learning_rate": 1.0566356720202874e-05, "loss": 8.9093, "step": 250, "task_loss": 3.5275111198425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.746103286743164, "epoch": 0.21, "learning_rate": 1.0608622147083686e-05, "loss": 8.2074, "step": 251, "task_loss": 3.4609551429748535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.537322998046875, "epoch": 0.21, "learning_rate": 1.0650887573964498e-05, "loss": 9.0778, "step": 252, "task_loss": 3.7272391319274902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.144298553466797, "epoch": 0.21, "learning_rate": 1.069315300084531e-05, "loss": 7.3586, "step": 253, "task_loss": 3.3878133296966553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.603456497192383, "epoch": 0.21, "learning_rate": 1.073541842772612e-05, "loss": 8.6143, "step": 254, "task_loss": 3.531137704849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.22976303100586, "epoch": 0.22, "learning_rate": 1.0777683854606932e-05, "loss": 8.3453, "step": 255, "task_loss": 2.8870022296905518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.45205545425415, "epoch": 0.22, "learning_rate": 1.0819949281487743e-05, "loss": 8.4611, "step": 256, "task_loss": 3.4483728408813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.165212631225586, "epoch": 0.22, "learning_rate": 1.0862214708368555e-05, "loss": 8.0376, "step": 257, "task_loss": 3.178316593170166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.161018371582031, "epoch": 0.22, "learning_rate": 1.0904480135249366e-05, "loss": 8.0121, "step": 258, "task_loss": 3.801513433456421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.667741775512695, "epoch": 0.22, "learning_rate": 1.094674556213018e-05, "loss": 7.6147, "step": 259, "task_loss": 3.5329809188842773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.509736061096191, "epoch": 0.22, "learning_rate": 1.0989010989010989e-05, "loss": 8.6583, "step": 260, "task_loss": 3.0156421661376953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.281269073486328, "epoch": 0.22, "learning_rate": 1.1031276415891801e-05, "loss": 8.7036, "step": 261, "task_loss": 3.0013539791107178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.716171264648438, "epoch": 0.22, "learning_rate": 1.1073541842772613e-05, "loss": 8.8486, "step": 262, "task_loss": 3.244210720062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.68092155456543, "epoch": 0.22, "learning_rate": 1.1115807269653424e-05, "loss": 8.0044, "step": 263, "task_loss": 3.2940123081207275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.401957511901855, "epoch": 0.22, "learning_rate": 1.1158072696534236e-05, "loss": 8.7166, "step": 264, "task_loss": 3.208735704421997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.635251998901367, "epoch": 0.22, "learning_rate": 1.1200338123415047e-05, "loss": 8.0107, "step": 265, "task_loss": 3.4525182247161865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.818111419677734, "epoch": 0.22, "learning_rate": 1.1242603550295859e-05, "loss": 7.7175, "step": 266, "task_loss": 2.727339744567871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.090768814086914, "epoch": 0.23, "learning_rate": 1.128486897717667e-05, "loss": 7.9907, "step": 267, "task_loss": 3.7688605785369873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.088828086853027, "epoch": 0.23, "learning_rate": 1.132713440405748e-05, "loss": 8.2883, "step": 268, "task_loss": 3.1267666816711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.61367130279541, "epoch": 0.23, "learning_rate": 1.1369399830938294e-05, "loss": 7.5504, "step": 269, "task_loss": 3.1898815631866455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.58709716796875, "epoch": 0.23, "learning_rate": 1.1411665257819105e-05, "loss": 8.4225, "step": 270, "task_loss": 2.7470362186431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.159021377563477, "epoch": 0.23, "learning_rate": 1.1453930684699916e-05, "loss": 7.9497, "step": 271, "task_loss": 3.456969976425171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.73142147064209, "epoch": 0.23, "learning_rate": 1.1496196111580727e-05, "loss": 7.9502, "step": 272, "task_loss": 3.209144353866577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.608966827392578, "epoch": 0.23, "learning_rate": 1.153846153846154e-05, "loss": 8.5023, "step": 273, "task_loss": 3.067208766937256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.067193984985352, "epoch": 0.23, "learning_rate": 1.158072696534235e-05, "loss": 8.4317, "step": 274, "task_loss": 2.8097782135009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.70694637298584, "epoch": 0.23, "learning_rate": 1.1622992392223162e-05, "loss": 8.8497, "step": 275, "task_loss": 3.1005516052246094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.506094932556152, "epoch": 0.23, "learning_rate": 1.1665257819103974e-05, "loss": 7.9693, "step": 276, "task_loss": 3.0219662189483643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 10.111152648925781, "epoch": 0.23, "learning_rate": 1.1707523245984786e-05, "loss": 8.1954, "step": 277, "task_loss": 3.1629347801208496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.7543840408325195, "epoch": 0.23, "learning_rate": 1.1749788672865597e-05, "loss": 7.7928, "step": 278, "task_loss": 3.210057020187378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.188498497009277, "epoch": 0.24, "learning_rate": 1.1792054099746408e-05, "loss": 8.309, "step": 279, "task_loss": 3.192016839981079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.26401138305664, "epoch": 0.24, "learning_rate": 1.1834319526627219e-05, "loss": 8.2512, "step": 280, "task_loss": 3.2035396099090576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.50117301940918, "epoch": 0.24, "learning_rate": 1.1876584953508032e-05, "loss": 7.7436, "step": 281, "task_loss": 3.1840407848358154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.73055362701416, "epoch": 0.24, "learning_rate": 1.1918850380388843e-05, "loss": 8.3993, "step": 282, "task_loss": 3.3466413021087646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.744690895080566, "epoch": 0.24, "learning_rate": 1.1961115807269654e-05, "loss": 8.0616, "step": 283, "task_loss": 2.9450626373291016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.223235130310059, "epoch": 0.24, "learning_rate": 1.2003381234150465e-05, "loss": 7.4737, "step": 284, "task_loss": 2.517256736755371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.263904571533203, "epoch": 0.24, "learning_rate": 1.2045646661031278e-05, "loss": 7.2453, "step": 285, "task_loss": 3.256157875061035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.5834479331970215, "epoch": 0.24, "learning_rate": 1.2087912087912089e-05, "loss": 7.5838, "step": 286, "task_loss": 2.9883928298950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.039785385131836, "epoch": 0.24, "learning_rate": 1.21301775147929e-05, "loss": 7.7269, "step": 287, "task_loss": 2.9204177856445312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.620002746582031, "epoch": 0.24, "learning_rate": 1.2172442941673713e-05, "loss": 7.6982, "step": 288, "task_loss": 3.3289504051208496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.330410480499268, "epoch": 0.24, "learning_rate": 1.2214708368554522e-05, "loss": 6.6127, "step": 289, "task_loss": 3.1552486419677734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.199276924133301, "epoch": 0.24, "learning_rate": 1.2256973795435335e-05, "loss": 6.9329, "step": 290, "task_loss": 2.586980104446411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.269084453582764, "epoch": 0.25, "learning_rate": 1.2299239222316146e-05, "loss": 7.3932, "step": 291, "task_loss": 3.4052515029907227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.650925636291504, "epoch": 0.25, "learning_rate": 1.2341504649196957e-05, "loss": 7.279, "step": 292, "task_loss": 2.772911310195923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.6374993324279785, "epoch": 0.25, "learning_rate": 1.238377007607777e-05, "loss": 6.8702, "step": 293, "task_loss": 2.59550142288208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.041665077209473, "epoch": 0.25, "learning_rate": 1.242603550295858e-05, "loss": 7.3327, "step": 294, "task_loss": 2.7869927883148193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 9.058813095092773, "epoch": 0.25, "learning_rate": 1.2468300929839392e-05, "loss": 7.486, "step": 295, "task_loss": 2.856990337371826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.69514274597168, "epoch": 0.25, "learning_rate": 1.2510566356720205e-05, "loss": 7.4231, "step": 296, "task_loss": 3.403477907180786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.02663516998291, "epoch": 0.25, "learning_rate": 1.2552831783601016e-05, "loss": 7.0087, "step": 297, "task_loss": 2.9371819496154785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.265678882598877, "epoch": 0.25, "learning_rate": 1.2595097210481827e-05, "loss": 7.0669, "step": 298, "task_loss": 2.465744733810425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.6635541915893555, "epoch": 0.25, "learning_rate": 1.2637362637362638e-05, "loss": 7.1107, "step": 299, "task_loss": 2.632458209991455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.309359073638916, "epoch": 0.25, "learning_rate": 1.2679628064243449e-05, "loss": 7.4685, "step": 300, "task_loss": 3.1758174896240234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.924592971801758, "epoch": 0.25, "learning_rate": 1.2721893491124262e-05, "loss": 7.228, "step": 301, "task_loss": 2.345716714859009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.444634437561035, "epoch": 0.26, "learning_rate": 1.2764158918005073e-05, "loss": 7.2159, "step": 302, "task_loss": 2.6961519718170166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.346635818481445, "epoch": 0.26, "learning_rate": 1.2806424344885884e-05, "loss": 6.77, "step": 303, "task_loss": 2.6522417068481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.010703086853027, "epoch": 0.26, "learning_rate": 1.2848689771766695e-05, "loss": 7.0704, "step": 304, "task_loss": 3.018387794494629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.830002784729004, "epoch": 0.26, "learning_rate": 1.2890955198647506e-05, "loss": 7.4245, "step": 305, "task_loss": 3.183865547180176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.979141712188721, "epoch": 0.26, "learning_rate": 1.2933220625528319e-05, "loss": 7.3239, "step": 306, "task_loss": 2.4571142196655273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.972380638122559, "epoch": 0.26, "learning_rate": 1.297548605240913e-05, "loss": 7.2231, "step": 307, "task_loss": 2.3960916996002197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.064640522003174, "epoch": 0.26, "learning_rate": 1.3017751479289941e-05, "loss": 6.6009, "step": 308, "task_loss": 2.6419718265533447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.610091686248779, "epoch": 0.26, "learning_rate": 1.3060016906170752e-05, "loss": 7.2452, "step": 309, "task_loss": 2.8806211948394775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.081999778747559, "epoch": 0.26, "learning_rate": 1.3102282333051563e-05, "loss": 6.1722, "step": 310, "task_loss": 2.75994873046875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.020060539245605, "epoch": 0.26, "learning_rate": 1.3144547759932378e-05, "loss": 6.9228, "step": 311, "task_loss": 2.7260892391204834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.168285369873047, "epoch": 0.26, "learning_rate": 1.3186813186813187e-05, "loss": 7.0175, "step": 312, "task_loss": 2.3912341594696045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.537705421447754, "epoch": 0.26, "learning_rate": 1.3229078613693998e-05, "loss": 7.0273, "step": 313, "task_loss": 2.5945322513580322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.620354652404785, "epoch": 0.27, "learning_rate": 1.327134404057481e-05, "loss": 7.0173, "step": 314, "task_loss": 3.1213581562042236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.438002109527588, "epoch": 0.27, "learning_rate": 1.3313609467455624e-05, "loss": 6.332, "step": 315, "task_loss": 2.336137533187866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.507883071899414, "epoch": 0.27, "learning_rate": 1.3355874894336435e-05, "loss": 6.9275, "step": 316, "task_loss": 2.363334894180298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.7529296875, "epoch": 0.27, "learning_rate": 1.3398140321217246e-05, "loss": 6.9245, "step": 317, "task_loss": 2.8360331058502197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.88886833190918, "epoch": 0.27, "learning_rate": 1.3440405748098055e-05, "loss": 6.1753, "step": 318, "task_loss": 2.483402729034424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.032519340515137, "epoch": 0.27, "learning_rate": 1.3482671174978866e-05, "loss": 7.0717, "step": 319, "task_loss": 3.0559518337249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.813842296600342, "epoch": 0.27, "learning_rate": 1.352493660185968e-05, "loss": 7.3942, "step": 320, "task_loss": 2.770368814468384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.590816497802734, "epoch": 0.27, "learning_rate": 1.3567202028740492e-05, "loss": 6.1288, "step": 321, "task_loss": 2.619555711746216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.410475730895996, "epoch": 0.27, "learning_rate": 1.3609467455621303e-05, "loss": 6.5503, "step": 322, "task_loss": 2.099933385848999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.537872314453125, "epoch": 0.27, "learning_rate": 1.3651732882502114e-05, "loss": 7.053, "step": 323, "task_loss": 3.1748578548431396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.0810041427612305, "epoch": 0.27, "learning_rate": 1.3693998309382925e-05, "loss": 6.0314, "step": 324, "task_loss": 2.4908924102783203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.902927875518799, "epoch": 0.27, "learning_rate": 1.3736263736263738e-05, "loss": 6.7185, "step": 325, "task_loss": 1.9620531797409058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.460475444793701, "epoch": 0.28, "learning_rate": 1.3778529163144549e-05, "loss": 5.9992, "step": 326, "task_loss": 2.396761894226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.233303070068359, "epoch": 0.28, "learning_rate": 1.382079459002536e-05, "loss": 6.755, "step": 327, "task_loss": 2.5932676792144775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.193846702575684, "epoch": 0.28, "learning_rate": 1.3863060016906171e-05, "loss": 6.4138, "step": 328, "task_loss": 2.349155902862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.386889457702637, "epoch": 0.28, "learning_rate": 1.3905325443786982e-05, "loss": 6.4544, "step": 329, "task_loss": 2.641763687133789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.899024963378906, "epoch": 0.28, "learning_rate": 1.3947590870667795e-05, "loss": 6.3243, "step": 330, "task_loss": 2.0411782264709473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.233712673187256, "epoch": 0.28, "learning_rate": 1.3989856297548606e-05, "loss": 6.0765, "step": 331, "task_loss": 2.0285537242889404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.895257949829102, "epoch": 0.28, "learning_rate": 1.4032121724429417e-05, "loss": 6.289, "step": 332, "task_loss": 2.523454189300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.672489166259766, "epoch": 0.28, "learning_rate": 1.4074387151310228e-05, "loss": 6.4813, "step": 333, "task_loss": 1.8559277057647705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.948751449584961, "epoch": 0.28, "learning_rate": 1.411665257819104e-05, "loss": 6.1338, "step": 334, "task_loss": 2.6344242095947266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.425365447998047, "epoch": 0.28, "learning_rate": 1.4158918005071852e-05, "loss": 7.0047, "step": 335, "task_loss": 2.5383880138397217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.567205429077148, "epoch": 0.28, "learning_rate": 1.4201183431952663e-05, "loss": 6.0035, "step": 336, "task_loss": 2.2867391109466553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.570411205291748, "epoch": 0.28, "learning_rate": 1.4243448858833474e-05, "loss": 5.5269, "step": 337, "task_loss": 2.302530527114868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.863937854766846, "epoch": 0.29, "learning_rate": 1.4285714285714285e-05, "loss": 6.2082, "step": 338, "task_loss": 2.3930721282958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.419766426086426, "epoch": 0.29, "learning_rate": 1.4327979712595097e-05, "loss": 5.9672, "step": 339, "task_loss": 2.5264058113098145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.894221782684326, "epoch": 0.29, "learning_rate": 1.4370245139475911e-05, "loss": 5.7745, "step": 340, "task_loss": 2.48919939994812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.262081623077393, "epoch": 0.29, "learning_rate": 1.441251056635672e-05, "loss": 5.8928, "step": 341, "task_loss": 2.296867847442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 8.025496482849121, "epoch": 0.29, "learning_rate": 1.4454775993237531e-05, "loss": 6.122, "step": 342, "task_loss": 2.5754010677337646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.355745315551758, "epoch": 0.29, "learning_rate": 1.4497041420118343e-05, "loss": 6.1599, "step": 343, "task_loss": 2.6352131366729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.904348373413086, "epoch": 0.29, "learning_rate": 1.4539306846999157e-05, "loss": 6.0372, "step": 344, "task_loss": 2.200890302658081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.383485317230225, "epoch": 0.29, "learning_rate": 1.4581572273879968e-05, "loss": 5.7256, "step": 345, "task_loss": 2.2610771656036377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.447573661804199, "epoch": 0.29, "learning_rate": 1.462383770076078e-05, "loss": 5.5437, "step": 346, "task_loss": 2.330890655517578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.429883003234863, "epoch": 0.29, "learning_rate": 1.466610312764159e-05, "loss": 5.7613, "step": 347, "task_loss": 2.0712246894836426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.443944931030273, "epoch": 0.29, "learning_rate": 1.47083685545224e-05, "loss": 5.7443, "step": 348, "task_loss": 2.4881842136383057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.506386756896973, "epoch": 0.29, "learning_rate": 1.4750633981403214e-05, "loss": 5.4517, "step": 349, "task_loss": 1.6730608940124512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.357576847076416, "epoch": 0.3, "learning_rate": 1.4792899408284025e-05, "loss": 4.9021, "step": 350, "task_loss": 2.2803804874420166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.611043453216553, "epoch": 0.3, "learning_rate": 1.4835164835164836e-05, "loss": 5.723, "step": 351, "task_loss": 1.7297115325927734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.749140739440918, "epoch": 0.3, "learning_rate": 1.4877430262045647e-05, "loss": 6.0554, "step": 352, "task_loss": 2.546757936477661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.688474655151367, "epoch": 0.3, "learning_rate": 1.4919695688926458e-05, "loss": 5.3282, "step": 353, "task_loss": 1.8218973875045776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.267772674560547, "epoch": 0.3, "learning_rate": 1.4961961115807271e-05, "loss": 6.354, "step": 354, "task_loss": 1.9233524799346924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.722354412078857, "epoch": 0.3, "learning_rate": 1.5004226542688082e-05, "loss": 5.5794, "step": 355, "task_loss": 2.558985710144043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.218945503234863, "epoch": 0.3, "learning_rate": 1.5046491969568893e-05, "loss": 4.9183, "step": 356, "task_loss": 2.263394832611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.910632133483887, "epoch": 0.3, "learning_rate": 1.5088757396449705e-05, "loss": 5.2828, "step": 357, "task_loss": 2.0332255363464355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.502803325653076, "epoch": 0.3, "learning_rate": 1.5131022823330516e-05, "loss": 5.3063, "step": 358, "task_loss": 2.33392071723938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.088931083679199, "epoch": 0.3, "learning_rate": 1.5173288250211328e-05, "loss": 6.3923, "step": 359, "task_loss": 1.8620336055755615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.987544059753418, "epoch": 0.3, "learning_rate": 1.521555367709214e-05, "loss": 5.3951, "step": 360, "task_loss": 2.5419938564300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.578038215637207, "epoch": 0.3, "learning_rate": 1.525781910397295e-05, "loss": 4.9977, "step": 361, "task_loss": 1.972806692123413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.173656463623047, "epoch": 0.31, "learning_rate": 1.530008453085376e-05, "loss": 5.0946, "step": 362, "task_loss": 1.5557239055633545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.562951564788818, "epoch": 0.31, "learning_rate": 1.534234995773457e-05, "loss": 5.8932, "step": 363, "task_loss": 2.416274309158325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.875923156738281, "epoch": 0.31, "learning_rate": 1.5384615384615387e-05, "loss": 4.9647, "step": 364, "task_loss": 2.394611358642578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.773307800292969, "epoch": 0.31, "learning_rate": 1.5426880811496197e-05, "loss": 5.5396, "step": 365, "task_loss": 1.7742772102355957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.911276340484619, "epoch": 0.31, "learning_rate": 1.546914623837701e-05, "loss": 5.5433, "step": 366, "task_loss": 1.9132928848266602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.448709487915039, "epoch": 0.31, "learning_rate": 1.551141166525782e-05, "loss": 5.0675, "step": 367, "task_loss": 2.2840864658355713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.060549736022949, "epoch": 0.31, "learning_rate": 1.555367709213863e-05, "loss": 4.6987, "step": 368, "task_loss": 2.443235397338867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.242588043212891, "epoch": 0.31, "learning_rate": 1.5595942519019444e-05, "loss": 4.6777, "step": 369, "task_loss": 1.955786108970642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.5537238121032715, "epoch": 0.31, "learning_rate": 1.5638207945900254e-05, "loss": 5.8653, "step": 370, "task_loss": 2.4549827575683594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.0424604415893555, "epoch": 0.31, "learning_rate": 1.5680473372781066e-05, "loss": 5.1823, "step": 371, "task_loss": 1.7950185537338257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.466312885284424, "epoch": 0.31, "learning_rate": 1.5722738799661876e-05, "loss": 5.4229, "step": 372, "task_loss": 1.4839543104171753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.001593112945557, "epoch": 0.32, "learning_rate": 1.576500422654269e-05, "loss": 5.1429, "step": 373, "task_loss": 1.7042378187179565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 7.290884017944336, "epoch": 0.32, "learning_rate": 1.58072696534235e-05, "loss": 5.336, "step": 374, "task_loss": 1.8394978046417236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.908597946166992, "epoch": 0.32, "learning_rate": 1.584953508030431e-05, "loss": 5.2446, "step": 375, "task_loss": 1.6273515224456787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.360844135284424, "epoch": 0.32, "learning_rate": 1.5891800507185124e-05, "loss": 4.5445, "step": 376, "task_loss": 1.5615137815475464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.3032426834106445, "epoch": 0.32, "learning_rate": 1.5934065934065933e-05, "loss": 5.3883, "step": 377, "task_loss": 1.7372429370880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.513960838317871, "epoch": 0.32, "learning_rate": 1.5976331360946746e-05, "loss": 4.8746, "step": 378, "task_loss": 2.129615306854248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.728913307189941, "epoch": 0.32, "learning_rate": 1.601859678782756e-05, "loss": 5.2556, "step": 379, "task_loss": 1.3387094736099243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.840574741363525, "epoch": 0.32, "learning_rate": 1.6060862214708368e-05, "loss": 4.8045, "step": 380, "task_loss": 1.8836053609848022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.458423614501953, "epoch": 0.32, "learning_rate": 1.610312764158918e-05, "loss": 4.961, "step": 381, "task_loss": 1.8382712602615356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.191441059112549, "epoch": 0.32, "learning_rate": 1.614539306846999e-05, "loss": 4.7982, "step": 382, "task_loss": 1.897676706314087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.75653076171875, "epoch": 0.32, "learning_rate": 1.6187658495350806e-05, "loss": 5.0632, "step": 383, "task_loss": 2.3354899883270264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.828614234924316, "epoch": 0.32, "learning_rate": 1.6229923922231616e-05, "loss": 4.7467, "step": 384, "task_loss": 1.2330222129821777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.0315423011779785, "epoch": 0.33, "learning_rate": 1.6272189349112425e-05, "loss": 5.0099, "step": 385, "task_loss": 1.9350425004959106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.52721643447876, "epoch": 0.33, "learning_rate": 1.6314454775993238e-05, "loss": 4.8158, "step": 386, "task_loss": 1.4878164529800415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.129389762878418, "epoch": 0.33, "learning_rate": 1.6356720202874047e-05, "loss": 4.7028, "step": 387, "task_loss": 1.8630125522613525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.083349227905273, "epoch": 0.33, "learning_rate": 1.6398985629754863e-05, "loss": 4.1317, "step": 388, "task_loss": 1.6951853036880493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.8746232986450195, "epoch": 0.33, "learning_rate": 1.6441251056635673e-05, "loss": 4.896, "step": 389, "task_loss": 2.5312702655792236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.8668646812438965, "epoch": 0.33, "learning_rate": 1.6483516483516486e-05, "loss": 4.7253, "step": 390, "task_loss": 1.8513497114181519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.4477410316467285, "epoch": 0.33, "learning_rate": 1.6525781910397295e-05, "loss": 4.6591, "step": 391, "task_loss": 1.965864896774292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.611985206604004, "epoch": 0.33, "learning_rate": 1.6568047337278108e-05, "loss": 4.7619, "step": 392, "task_loss": 1.4204081296920776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.54319953918457, "epoch": 0.33, "learning_rate": 1.661031276415892e-05, "loss": 4.4114, "step": 393, "task_loss": 2.0045440196990967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.020666122436523, "epoch": 0.33, "learning_rate": 1.665257819103973e-05, "loss": 4.181, "step": 394, "task_loss": 2.044067859649658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.768197059631348, "epoch": 0.33, "learning_rate": 1.6694843617920543e-05, "loss": 5.1337, "step": 395, "task_loss": 1.7999083995819092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.781773090362549, "epoch": 0.33, "learning_rate": 1.6737109044801352e-05, "loss": 4.3299, "step": 396, "task_loss": 1.8045899868011475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.032170295715332, "epoch": 0.34, "learning_rate": 1.6779374471682165e-05, "loss": 4.7602, "step": 397, "task_loss": 1.921103596687317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.617743492126465, "epoch": 0.34, "learning_rate": 1.6821639898562978e-05, "loss": 4.3861, "step": 398, "task_loss": 1.8125417232513428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.445462226867676, "epoch": 0.34, "learning_rate": 1.6863905325443787e-05, "loss": 4.8476, "step": 399, "task_loss": 1.3715507984161377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.970694541931152, "epoch": 0.34, "learning_rate": 1.69061707523246e-05, "loss": 4.6139, "step": 400, "task_loss": 1.889340877532959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.383505821228027, "epoch": 0.34, "learning_rate": 1.694843617920541e-05, "loss": 4.0995, "step": 401, "task_loss": 2.118938446044922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 6.329733371734619, "epoch": 0.34, "learning_rate": 1.6990701606086222e-05, "loss": 4.5507, "step": 402, "task_loss": 2.2250239849090576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.189744472503662, "epoch": 0.34, "learning_rate": 1.7032967032967035e-05, "loss": 3.9479, "step": 403, "task_loss": 1.1580610275268555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4113504886627197, "epoch": 0.34, "learning_rate": 1.7075232459847844e-05, "loss": 4.067, "step": 404, "task_loss": 1.7253646850585938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.405399322509766, "epoch": 0.34, "learning_rate": 1.7117497886728657e-05, "loss": 4.5987, "step": 405, "task_loss": 1.6780617237091064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.539499282836914, "epoch": 0.34, "learning_rate": 1.7159763313609466e-05, "loss": 3.9092, "step": 406, "task_loss": 0.9126412272453308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.492049217224121, "epoch": 0.34, "learning_rate": 1.7202028740490282e-05, "loss": 3.8372, "step": 407, "task_loss": 1.1993082761764526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.659815788269043, "epoch": 0.34, "learning_rate": 1.7244294167371092e-05, "loss": 4.0019, "step": 408, "task_loss": 1.6589818000793457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.182251930236816, "epoch": 0.35, "learning_rate": 1.72865595942519e-05, "loss": 4.2753, "step": 409, "task_loss": 1.3572207689285278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.373161315917969, "epoch": 0.35, "learning_rate": 1.7328825021132714e-05, "loss": 4.525, "step": 410, "task_loss": 1.2126350402832031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.914691925048828, "epoch": 0.35, "learning_rate": 1.7371090448013523e-05, "loss": 4.4747, "step": 411, "task_loss": 1.7921062707901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.296452045440674, "epoch": 0.35, "learning_rate": 1.741335587489434e-05, "loss": 4.2379, "step": 412, "task_loss": 0.5651588439941406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.935454845428467, "epoch": 0.35, "learning_rate": 1.745562130177515e-05, "loss": 3.808, "step": 413, "task_loss": 1.6267709732055664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.497427463531494, "epoch": 0.35, "learning_rate": 1.749788672865596e-05, "loss": 4.0697, "step": 414, "task_loss": 1.7770404815673828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.01531982421875, "epoch": 0.35, "learning_rate": 1.754015215553677e-05, "loss": 4.0516, "step": 415, "task_loss": 1.6030842065811157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.800504207611084, "epoch": 0.35, "learning_rate": 1.7582417582417584e-05, "loss": 3.7625, "step": 416, "task_loss": 1.446210503578186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.602176666259766, "epoch": 0.35, "learning_rate": 1.7624683009298397e-05, "loss": 4.5358, "step": 417, "task_loss": 1.539741039276123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.226249694824219, "epoch": 0.35, "learning_rate": 1.7666948436179206e-05, "loss": 3.9055, "step": 418, "task_loss": 1.6847968101501465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.887887477874756, "epoch": 0.35, "learning_rate": 1.770921386306002e-05, "loss": 3.7248, "step": 419, "task_loss": 1.6622414588928223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.5326428413391113, "epoch": 0.35, "learning_rate": 1.7751479289940828e-05, "loss": 3.8524, "step": 420, "task_loss": 2.254150390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.487025022506714, "epoch": 0.36, "learning_rate": 1.779374471682164e-05, "loss": 3.5779, "step": 421, "task_loss": 1.3388928174972534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9131360054016113, "epoch": 0.36, "learning_rate": 1.7836010143702454e-05, "loss": 3.989, "step": 422, "task_loss": 1.0892082452774048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.182522773742676, "epoch": 0.36, "learning_rate": 1.7878275570583263e-05, "loss": 3.3552, "step": 423, "task_loss": 1.460448980331421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.647093296051025, "epoch": 0.36, "learning_rate": 1.7920540997464076e-05, "loss": 4.3733, "step": 424, "task_loss": 1.912064552307129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 5.477087497711182, "epoch": 0.36, "learning_rate": 1.7962806424344885e-05, "loss": 3.876, "step": 425, "task_loss": 1.1669418811798096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.6018176078796387, "epoch": 0.36, "learning_rate": 1.8005071851225698e-05, "loss": 4.1107, "step": 426, "task_loss": 2.482456922531128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.34474515914917, "epoch": 0.36, "learning_rate": 1.804733727810651e-05, "loss": 3.5892, "step": 427, "task_loss": 2.2219133377075195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.018124103546143, "epoch": 0.36, "learning_rate": 1.808960270498732e-05, "loss": 3.6816, "step": 428, "task_loss": 2.018838882446289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1224348545074463, "epoch": 0.36, "learning_rate": 1.8131868131868133e-05, "loss": 3.7753, "step": 429, "task_loss": 1.4040857553482056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.59801721572876, "epoch": 0.36, "learning_rate": 1.8174133558748942e-05, "loss": 3.811, "step": 430, "task_loss": 2.4234955310821533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.734684944152832, "epoch": 0.36, "learning_rate": 1.8216398985629755e-05, "loss": 3.7392, "step": 431, "task_loss": 2.027235984802246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.421584606170654, "epoch": 0.36, "learning_rate": 1.8258664412510568e-05, "loss": 4.1532, "step": 432, "task_loss": 1.4803043603897095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.6887450218200684, "epoch": 0.37, "learning_rate": 1.8300929839391377e-05, "loss": 3.3957, "step": 433, "task_loss": 1.236428141593933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.0300445556640625, "epoch": 0.37, "learning_rate": 1.834319526627219e-05, "loss": 3.3946, "step": 434, "task_loss": 1.5627824068069458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.120479106903076, "epoch": 0.37, "learning_rate": 1.8385460693153e-05, "loss": 3.5054, "step": 435, "task_loss": 0.7140458226203918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.7074668407440186, "epoch": 0.37, "learning_rate": 1.8427726120033816e-05, "loss": 3.2998, "step": 436, "task_loss": 1.4515857696533203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.678971290588379, "epoch": 0.37, "learning_rate": 1.8469991546914625e-05, "loss": 3.5096, "step": 437, "task_loss": 2.129225969314575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.707834005355835, "epoch": 0.37, "learning_rate": 1.8512256973795435e-05, "loss": 3.2218, "step": 438, "task_loss": 1.6021143198013306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.78014874458313, "epoch": 0.37, "learning_rate": 1.8554522400676247e-05, "loss": 3.3855, "step": 439, "task_loss": 1.9996271133422852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.389688014984131, "epoch": 0.37, "learning_rate": 1.8596787827557057e-05, "loss": 3.2295, "step": 440, "task_loss": 1.7247347831726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.061910629272461, "epoch": 0.37, "learning_rate": 1.8639053254437873e-05, "loss": 3.1201, "step": 441, "task_loss": 1.2800053358078003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8365182876586914, "epoch": 0.37, "learning_rate": 1.8681318681318682e-05, "loss": 3.2807, "step": 442, "task_loss": 1.5826764106750488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.844360589981079, "epoch": 0.37, "learning_rate": 1.8723584108199495e-05, "loss": 3.6967, "step": 443, "task_loss": 2.011652946472168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9784743785858154, "epoch": 0.38, "learning_rate": 1.8765849535080304e-05, "loss": 3.3886, "step": 444, "task_loss": 1.5634260177612305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5377941131591797, "epoch": 0.38, "learning_rate": 1.8808114961961117e-05, "loss": 3.3721, "step": 445, "task_loss": 1.0739930868148804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4861831665039062, "epoch": 0.38, "learning_rate": 1.885038038884193e-05, "loss": 3.2106, "step": 446, "task_loss": 1.2480708360671997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4460606575012207, "epoch": 0.38, "learning_rate": 1.889264581572274e-05, "loss": 3.1539, "step": 447, "task_loss": 1.3746886253356934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.138279914855957, "epoch": 0.38, "learning_rate": 1.8934911242603552e-05, "loss": 2.9943, "step": 448, "task_loss": 1.261868953704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0119004249572754, "epoch": 0.38, "learning_rate": 1.897717666948436e-05, "loss": 2.6361, "step": 449, "task_loss": 1.8958187103271484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.7453014850616455, "epoch": 0.38, "learning_rate": 1.9019442096365174e-05, "loss": 3.4186, "step": 450, "task_loss": 1.3763338327407837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.986863613128662, "epoch": 0.38, "learning_rate": 1.9061707523245987e-05, "loss": 3.0818, "step": 451, "task_loss": 1.090401291847229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.598148822784424, "epoch": 0.38, "learning_rate": 1.9103972950126796e-05, "loss": 3.0639, "step": 452, "task_loss": 1.4045134782791138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.9013113975524902, "epoch": 0.38, "learning_rate": 1.914623837700761e-05, "loss": 3.6508, "step": 453, "task_loss": 1.487033724784851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.8980655670166016, "epoch": 0.38, "learning_rate": 1.918850380388842e-05, "loss": 3.3144, "step": 454, "task_loss": 1.6256424188613892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.8106842041015625, "epoch": 0.38, "learning_rate": 1.923076923076923e-05, "loss": 3.1061, "step": 455, "task_loss": 1.6683250665664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.250868797302246, "epoch": 0.39, "learning_rate": 1.9273034657650044e-05, "loss": 2.9181, "step": 456, "task_loss": 0.7861428260803223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.670706272125244, "epoch": 0.39, "learning_rate": 1.9315300084530854e-05, "loss": 3.3494, "step": 457, "task_loss": 1.307903528213501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.914956569671631, "epoch": 0.39, "learning_rate": 1.9357565511411666e-05, "loss": 2.9449, "step": 458, "task_loss": 2.018502712249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3373169898986816, "epoch": 0.39, "learning_rate": 1.9399830938292476e-05, "loss": 2.5605, "step": 459, "task_loss": 1.1175545454025269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3223793506622314, "epoch": 0.39, "learning_rate": 1.944209636517329e-05, "loss": 2.9028, "step": 460, "task_loss": 1.8081469535827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.100377082824707, "epoch": 0.39, "learning_rate": 1.94843617920541e-05, "loss": 2.918, "step": 461, "task_loss": 1.6999719142913818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4682412147521973, "epoch": 0.39, "learning_rate": 1.952662721893491e-05, "loss": 2.9514, "step": 462, "task_loss": 1.1028684377670288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.052436351776123, "epoch": 0.39, "learning_rate": 1.9568892645815723e-05, "loss": 3.1347, "step": 463, "task_loss": 1.2945197820663452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.003455638885498, "epoch": 0.39, "learning_rate": 1.9611158072696533e-05, "loss": 2.8966, "step": 464, "task_loss": 1.365273356437683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.377332925796509, "epoch": 0.39, "learning_rate": 1.965342349957735e-05, "loss": 2.7946, "step": 465, "task_loss": 1.4409900903701782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.902750015258789, "epoch": 0.39, "learning_rate": 1.969568892645816e-05, "loss": 3.2007, "step": 466, "task_loss": 1.468926191329956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.440169095993042, "epoch": 0.39, "learning_rate": 1.9737954353338968e-05, "loss": 2.9493, "step": 467, "task_loss": 0.9817131757736206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 4.182075023651123, "epoch": 0.4, "learning_rate": 1.978021978021978e-05, "loss": 3.2872, "step": 468, "task_loss": 1.5237255096435547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4946439266204834, "epoch": 0.4, "learning_rate": 1.9822485207100593e-05, "loss": 2.9127, "step": 469, "task_loss": 0.9216484427452087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.713230609893799, "epoch": 0.4, "learning_rate": 1.9864750633981406e-05, "loss": 3.0136, "step": 470, "task_loss": 2.355800151824951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5385334491729736, "epoch": 0.4, "learning_rate": 1.9907016060862216e-05, "loss": 3.1428, "step": 471, "task_loss": 1.546563982963562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2123990058898926, "epoch": 0.4, "learning_rate": 1.994928148774303e-05, "loss": 2.6846, "step": 472, "task_loss": 1.3410879373550415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0110840797424316, "epoch": 0.4, "learning_rate": 1.9991546914623838e-05, "loss": 3.226, "step": 473, "task_loss": 1.1221226453781128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2938060760498047, "epoch": 0.4, "learning_rate": 2.003381234150465e-05, "loss": 2.4607, "step": 474, "task_loss": 1.049346923828125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9862170219421387, "epoch": 0.4, "learning_rate": 2.0076077768385463e-05, "loss": 3.0316, "step": 475, "task_loss": 0.8953298926353455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.3019683361053467, "epoch": 0.4, "learning_rate": 2.0118343195266273e-05, "loss": 3.089, "step": 476, "task_loss": 1.886934757232666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.573516845703125, "epoch": 0.4, "learning_rate": 2.0160608622147085e-05, "loss": 3.0743, "step": 477, "task_loss": 1.2565950155258179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.333932638168335, "epoch": 0.4, "learning_rate": 2.0202874049027895e-05, "loss": 2.9761, "step": 478, "task_loss": 2.347757577896118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.129934310913086, "epoch": 0.4, "learning_rate": 2.0245139475908708e-05, "loss": 2.3854, "step": 479, "task_loss": 1.7285182476043701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.578687906265259, "epoch": 0.41, "learning_rate": 2.028740490278952e-05, "loss": 2.871, "step": 480, "task_loss": 1.4707187414169312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1081323623657227, "epoch": 0.41, "learning_rate": 2.032967032967033e-05, "loss": 3.203, "step": 481, "task_loss": 2.3341994285583496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.69724702835083, "epoch": 0.41, "learning_rate": 2.0371935756551143e-05, "loss": 2.5809, "step": 482, "task_loss": 1.1442439556121826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.2646069526672363, "epoch": 0.41, "learning_rate": 2.0414201183431952e-05, "loss": 2.828, "step": 483, "task_loss": 1.5372252464294434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8411061763763428, "epoch": 0.41, "learning_rate": 2.0456466610312765e-05, "loss": 2.7103, "step": 484, "task_loss": 1.1392698287963867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7103962898254395, "epoch": 0.41, "learning_rate": 2.0498732037193578e-05, "loss": 3.2071, "step": 485, "task_loss": 1.714789867401123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.888676643371582, "epoch": 0.41, "learning_rate": 2.0540997464074387e-05, "loss": 2.3974, "step": 486, "task_loss": 1.2281129360198975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6795501708984375, "epoch": 0.41, "learning_rate": 2.05832628909552e-05, "loss": 2.5494, "step": 487, "task_loss": 1.2531245946884155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.906757354736328, "epoch": 0.41, "learning_rate": 2.062552831783601e-05, "loss": 2.8051, "step": 488, "task_loss": 0.8815559148788452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9760488271713257, "epoch": 0.41, "learning_rate": 2.0667793744716822e-05, "loss": 2.7453, "step": 489, "task_loss": 0.9677852392196655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6790108680725098, "epoch": 0.41, "learning_rate": 2.0710059171597635e-05, "loss": 2.5883, "step": 490, "task_loss": 0.6970438957214355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.4974966049194336, "epoch": 0.41, "learning_rate": 2.0752324598478444e-05, "loss": 2.8075, "step": 491, "task_loss": 1.277848720550537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.780791997909546, "epoch": 0.42, "learning_rate": 2.0794590025359257e-05, "loss": 2.772, "step": 492, "task_loss": 1.3234076499938965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.041388511657715, "epoch": 0.42, "learning_rate": 2.083685545224007e-05, "loss": 2.5044, "step": 493, "task_loss": 1.5247846841812134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.6011877059936523, "epoch": 0.42, "learning_rate": 2.0879120879120882e-05, "loss": 2.9537, "step": 494, "task_loss": 1.1916651725769043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6453027725219727, "epoch": 0.42, "learning_rate": 2.0921386306001692e-05, "loss": 2.433, "step": 495, "task_loss": 1.9161524772644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.4757633209228516, "epoch": 0.42, "learning_rate": 2.09636517328825e-05, "loss": 2.5843, "step": 496, "task_loss": 1.6245217323303223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0797581672668457, "epoch": 0.42, "learning_rate": 2.1005917159763314e-05, "loss": 2.9195, "step": 497, "task_loss": 1.1057181358337402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.042912483215332, "epoch": 0.42, "learning_rate": 2.1048182586644127e-05, "loss": 2.7021, "step": 498, "task_loss": 0.8674214482307434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8304779529571533, "epoch": 0.42, "learning_rate": 2.109044801352494e-05, "loss": 2.0921, "step": 499, "task_loss": 0.7738440632820129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1748499870300293, "epoch": 0.42, "learning_rate": 2.113271344040575e-05, "loss": 2.2676, "step": 500, "task_loss": 0.4352116286754608 }, { "epoch": 0.42, "eval_accuracy": 0.7947326732673268, "eval_loss": 2.108656167984009, "eval_runtime": 226.177, "eval_samples_per_second": 111.638, "eval_steps_per_second": 0.875, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1833982467651367, "epoch": 0.42, "learning_rate": 2.117497886728656e-05, "loss": 2.7708, "step": 501, "task_loss": 1.7501763105392456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2712764739990234, "epoch": 0.42, "learning_rate": 2.121724429416737e-05, "loss": 2.39, "step": 502, "task_loss": 0.7685309648513794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.638744592666626, "epoch": 0.42, "learning_rate": 2.1259509721048184e-05, "loss": 2.5966, "step": 503, "task_loss": 0.982324481010437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0377399921417236, "epoch": 0.43, "learning_rate": 2.1301775147928997e-05, "loss": 2.4878, "step": 504, "task_loss": 1.7711303234100342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.876831531524658, "epoch": 0.43, "learning_rate": 2.1344040574809806e-05, "loss": 2.2551, "step": 505, "task_loss": 1.4975056648254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.882049798965454, "epoch": 0.43, "learning_rate": 2.138630600169062e-05, "loss": 1.8603, "step": 506, "task_loss": 1.1373587846755981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.83182692527771, "epoch": 0.43, "learning_rate": 2.1428571428571428e-05, "loss": 2.2318, "step": 507, "task_loss": 0.48504412174224854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.381467819213867, "epoch": 0.43, "learning_rate": 2.147083685545224e-05, "loss": 2.3466, "step": 508, "task_loss": 0.5970094203948975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.466721296310425, "epoch": 0.43, "learning_rate": 2.1513102282333054e-05, "loss": 2.428, "step": 509, "task_loss": 1.0185757875442505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.970189332962036, "epoch": 0.43, "learning_rate": 2.1555367709213863e-05, "loss": 2.5189, "step": 510, "task_loss": 1.4654353857040405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7572011947631836, "epoch": 0.43, "learning_rate": 2.1597633136094676e-05, "loss": 2.225, "step": 511, "task_loss": 1.4214348793029785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.691232442855835, "epoch": 0.43, "learning_rate": 2.1639898562975485e-05, "loss": 2.4251, "step": 512, "task_loss": 1.1146931648254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0291330814361572, "epoch": 0.43, "learning_rate": 2.1682163989856298e-05, "loss": 2.7257, "step": 513, "task_loss": 0.8977842926979065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.302755832672119, "epoch": 0.43, "learning_rate": 2.172442941673711e-05, "loss": 2.2075, "step": 514, "task_loss": 1.5624113082885742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.222282648086548, "epoch": 0.44, "learning_rate": 2.176669484361792e-05, "loss": 2.5413, "step": 515, "task_loss": 1.5128321647644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.756943941116333, "epoch": 0.44, "learning_rate": 2.1808960270498733e-05, "loss": 2.1, "step": 516, "task_loss": 1.3281652927398682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5485689640045166, "epoch": 0.44, "learning_rate": 2.1851225697379546e-05, "loss": 2.3332, "step": 517, "task_loss": 2.0654587745666504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6797261238098145, "epoch": 0.44, "learning_rate": 2.189349112426036e-05, "loss": 2.0501, "step": 518, "task_loss": 1.215960144996643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.395606517791748, "epoch": 0.44, "learning_rate": 2.1935756551141168e-05, "loss": 2.1123, "step": 519, "task_loss": 1.6138554811477661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.91221284866333, "epoch": 0.44, "learning_rate": 2.1978021978021977e-05, "loss": 2.4037, "step": 520, "task_loss": 0.9587717652320862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.0148673057556152, "epoch": 0.44, "learning_rate": 2.202028740490279e-05, "loss": 2.6213, "step": 521, "task_loss": 1.174586296081543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.454350471496582, "epoch": 0.44, "learning_rate": 2.2062552831783603e-05, "loss": 2.4022, "step": 522, "task_loss": 0.8328395485877991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1828932762145996, "epoch": 0.44, "learning_rate": 2.2104818258664416e-05, "loss": 2.0248, "step": 523, "task_loss": 1.0991419553756714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.8340721130371094, "epoch": 0.44, "learning_rate": 2.2147083685545225e-05, "loss": 2.082, "step": 524, "task_loss": 0.8688100576400757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.5442023277282715, "epoch": 0.44, "learning_rate": 2.2189349112426034e-05, "loss": 2.2399, "step": 525, "task_loss": 1.4538289308547974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.400397539138794, "epoch": 0.44, "learning_rate": 2.2231614539306847e-05, "loss": 2.3214, "step": 526, "task_loss": 1.0998371839523315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.188443422317505, "epoch": 0.45, "learning_rate": 2.227387996618766e-05, "loss": 2.0528, "step": 527, "task_loss": 1.5736502408981323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7074997425079346, "epoch": 0.45, "learning_rate": 2.2316145393068473e-05, "loss": 2.27, "step": 528, "task_loss": 1.7446556091308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.664313793182373, "epoch": 0.45, "learning_rate": 2.2358410819949282e-05, "loss": 2.2817, "step": 529, "task_loss": 1.2587699890136719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.640709638595581, "epoch": 0.45, "learning_rate": 2.2400676246830095e-05, "loss": 2.2473, "step": 530, "task_loss": 1.3915153741836548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.774782657623291, "epoch": 0.45, "learning_rate": 2.2442941673710904e-05, "loss": 2.2367, "step": 531, "task_loss": 1.3553184270858765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0635666847229004, "epoch": 0.45, "learning_rate": 2.2485207100591717e-05, "loss": 2.0778, "step": 532, "task_loss": 1.5550239086151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6943728923797607, "epoch": 0.45, "learning_rate": 2.252747252747253e-05, "loss": 2.3212, "step": 533, "task_loss": 1.2910689115524292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7708451747894287, "epoch": 0.45, "learning_rate": 2.256973795435334e-05, "loss": 2.1486, "step": 534, "task_loss": 1.4383989572525024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9864957332611084, "epoch": 0.45, "learning_rate": 2.2612003381234152e-05, "loss": 2.4786, "step": 535, "task_loss": 1.2517355680465698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1722757816314697, "epoch": 0.45, "learning_rate": 2.265426880811496e-05, "loss": 2.1645, "step": 536, "task_loss": 1.5140818357467651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6191139221191406, "epoch": 0.45, "learning_rate": 2.2696534234995774e-05, "loss": 2.1956, "step": 537, "task_loss": 1.139264702796936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 3.1866304874420166, "epoch": 0.45, "learning_rate": 2.2738799661876587e-05, "loss": 2.6427, "step": 538, "task_loss": 1.6369730234146118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.068495035171509, "epoch": 0.46, "learning_rate": 2.2781065088757396e-05, "loss": 1.8554, "step": 539, "task_loss": 1.1220762729644775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.272372245788574, "epoch": 0.46, "learning_rate": 2.282333051563821e-05, "loss": 2.1855, "step": 540, "task_loss": 0.9333959817886353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8025707006454468, "epoch": 0.46, "learning_rate": 2.286559594251902e-05, "loss": 2.2615, "step": 541, "task_loss": 1.338422417640686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5329136848449707, "epoch": 0.46, "learning_rate": 2.290786136939983e-05, "loss": 1.9944, "step": 542, "task_loss": 0.9137334823608398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7770957946777344, "epoch": 0.46, "learning_rate": 2.2950126796280644e-05, "loss": 1.8463, "step": 543, "task_loss": 0.99053555727005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.054398536682129, "epoch": 0.46, "learning_rate": 2.2992392223161454e-05, "loss": 2.2633, "step": 544, "task_loss": 0.9043038487434387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6008200645446777, "epoch": 0.46, "learning_rate": 2.3034657650042266e-05, "loss": 2.0889, "step": 545, "task_loss": 1.2475757598876953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1617512702941895, "epoch": 0.46, "learning_rate": 2.307692307692308e-05, "loss": 2.1881, "step": 546, "task_loss": 0.4580702483654022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.823267936706543, "epoch": 0.46, "learning_rate": 2.3119188503803892e-05, "loss": 2.0982, "step": 547, "task_loss": 1.370139241218567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.469528913497925, "epoch": 0.46, "learning_rate": 2.31614539306847e-05, "loss": 2.1276, "step": 548, "task_loss": 1.1134635210037231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.733099341392517, "epoch": 0.46, "learning_rate": 2.320371935756551e-05, "loss": 1.9433, "step": 549, "task_loss": 1.7295442819595337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6101553440093994, "epoch": 0.46, "learning_rate": 2.3245984784446323e-05, "loss": 2.241, "step": 550, "task_loss": 1.6053123474121094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.630253791809082, "epoch": 0.47, "learning_rate": 2.3288250211327136e-05, "loss": 1.5826, "step": 551, "task_loss": 1.0710700750350952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6688848733901978, "epoch": 0.47, "learning_rate": 2.333051563820795e-05, "loss": 1.9808, "step": 552, "task_loss": 1.5891927480697632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8271673917770386, "epoch": 0.47, "learning_rate": 2.337278106508876e-05, "loss": 1.8334, "step": 553, "task_loss": 1.4577524662017822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8559801578521729, "epoch": 0.47, "learning_rate": 2.341504649196957e-05, "loss": 1.8992, "step": 554, "task_loss": 0.7260839939117432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.263404369354248, "epoch": 0.47, "learning_rate": 2.345731191885038e-05, "loss": 2.0486, "step": 555, "task_loss": 1.0958497524261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.4724626541137695, "epoch": 0.47, "learning_rate": 2.3499577345731193e-05, "loss": 1.913, "step": 556, "task_loss": 0.9496216773986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7800204753875732, "epoch": 0.47, "learning_rate": 2.3541842772612006e-05, "loss": 1.7483, "step": 557, "task_loss": 2.0007011890411377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.151249885559082, "epoch": 0.47, "learning_rate": 2.3584108199492815e-05, "loss": 1.8281, "step": 558, "task_loss": 0.9958018064498901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1911585330963135, "epoch": 0.47, "learning_rate": 2.3626373626373628e-05, "loss": 2.2765, "step": 559, "task_loss": 0.5404365062713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.868894100189209, "epoch": 0.47, "learning_rate": 2.3668639053254438e-05, "loss": 1.9481, "step": 560, "task_loss": 1.770961880683899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6021733283996582, "epoch": 0.47, "learning_rate": 2.371090448013525e-05, "loss": 1.632, "step": 561, "task_loss": 0.7386936545372009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.554785966873169, "epoch": 0.47, "learning_rate": 2.3753169907016063e-05, "loss": 2.2806, "step": 562, "task_loss": 1.2048391103744507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.723888874053955, "epoch": 0.48, "learning_rate": 2.3795435333896873e-05, "loss": 1.9438, "step": 563, "task_loss": 1.2216720581054688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2916860580444336, "epoch": 0.48, "learning_rate": 2.3837700760777685e-05, "loss": 1.6872, "step": 564, "task_loss": 1.7146090269088745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2390878200531006, "epoch": 0.48, "learning_rate": 2.3879966187658495e-05, "loss": 1.9545, "step": 565, "task_loss": 1.2985213994979858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8307461738586426, "epoch": 0.48, "learning_rate": 2.3922231614539308e-05, "loss": 1.8333, "step": 566, "task_loss": 0.6718560457229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2520875930786133, "epoch": 0.48, "learning_rate": 2.396449704142012e-05, "loss": 2.1959, "step": 567, "task_loss": 1.5362135171890259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6778745651245117, "epoch": 0.48, "learning_rate": 2.400676246830093e-05, "loss": 1.879, "step": 568, "task_loss": 0.5857014656066895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2357287406921387, "epoch": 0.48, "learning_rate": 2.4049027895181742e-05, "loss": 2.2874, "step": 569, "task_loss": 1.520495891571045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.042743682861328, "epoch": 0.48, "learning_rate": 2.4091293322062555e-05, "loss": 1.8701, "step": 570, "task_loss": 1.0886714458465576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3700690269470215, "epoch": 0.48, "learning_rate": 2.4133558748943365e-05, "loss": 1.7621, "step": 571, "task_loss": 0.702406644821167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9486669301986694, "epoch": 0.48, "learning_rate": 2.4175824175824177e-05, "loss": 1.544, "step": 572, "task_loss": 0.9646759033203125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1959726810455322, "epoch": 0.48, "learning_rate": 2.4218089602704987e-05, "loss": 2.1131, "step": 573, "task_loss": 1.5761204957962036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.38568115234375, "epoch": 0.48, "learning_rate": 2.42603550295858e-05, "loss": 2.0043, "step": 574, "task_loss": 1.259171962738037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0574824810028076, "epoch": 0.49, "learning_rate": 2.4302620456466612e-05, "loss": 1.8995, "step": 575, "task_loss": 0.9315642714500427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.021869659423828, "epoch": 0.49, "learning_rate": 2.4344885883347425e-05, "loss": 1.9897, "step": 576, "task_loss": 0.9874861836433411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2956435680389404, "epoch": 0.49, "learning_rate": 2.4387151310228235e-05, "loss": 1.8924, "step": 577, "task_loss": 1.2937473058700562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6407103538513184, "epoch": 0.49, "learning_rate": 2.4429416737109044e-05, "loss": 2.1338, "step": 578, "task_loss": 1.623302936553955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8056650161743164, "epoch": 0.49, "learning_rate": 2.4471682163989857e-05, "loss": 1.8035, "step": 579, "task_loss": 1.652133584022522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7478270530700684, "epoch": 0.49, "learning_rate": 2.451394759087067e-05, "loss": 2.0642, "step": 580, "task_loss": 1.1515332460403442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2086875438690186, "epoch": 0.49, "learning_rate": 2.4556213017751482e-05, "loss": 1.7182, "step": 581, "task_loss": 0.7725767493247986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.808950424194336, "epoch": 0.49, "learning_rate": 2.459847844463229e-05, "loss": 1.8926, "step": 582, "task_loss": 1.5882987976074219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7286043167114258, "epoch": 0.49, "learning_rate": 2.4640743871513104e-05, "loss": 1.9039, "step": 583, "task_loss": 0.6680277585983276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0558266639709473, "epoch": 0.49, "learning_rate": 2.4683009298393914e-05, "loss": 2.1621, "step": 584, "task_loss": 1.1184676885604858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9074819087982178, "epoch": 0.49, "learning_rate": 2.4725274725274727e-05, "loss": 1.9615, "step": 585, "task_loss": 1.5058660507202148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6721770763397217, "epoch": 0.5, "learning_rate": 2.476754015215554e-05, "loss": 1.5138, "step": 586, "task_loss": 0.29819393157958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4543733596801758, "epoch": 0.5, "learning_rate": 2.480980557903635e-05, "loss": 1.6232, "step": 587, "task_loss": 1.0513654947280884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9924726486206055, "epoch": 0.5, "learning_rate": 2.485207100591716e-05, "loss": 2.155, "step": 588, "task_loss": 2.361231565475464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.6872291564941406, "epoch": 0.5, "learning_rate": 2.489433643279797e-05, "loss": 2.0237, "step": 589, "task_loss": 1.0504076480865479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7158302068710327, "epoch": 0.5, "learning_rate": 2.4936601859678784e-05, "loss": 1.7117, "step": 590, "task_loss": 1.2631242275238037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4364511966705322, "epoch": 0.5, "learning_rate": 2.4978867286559597e-05, "loss": 1.4404, "step": 591, "task_loss": 0.7790091633796692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1071834564208984, "epoch": 0.5, "learning_rate": 2.502113271344041e-05, "loss": 1.9508, "step": 592, "task_loss": 0.7317472100257874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.250026226043701, "epoch": 0.5, "learning_rate": 2.506339814032122e-05, "loss": 1.7792, "step": 593, "task_loss": 1.3049629926681519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3768805265426636, "epoch": 0.5, "learning_rate": 2.510566356720203e-05, "loss": 1.4023, "step": 594, "task_loss": 0.7010613679885864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9267685413360596, "epoch": 0.5, "learning_rate": 2.514792899408284e-05, "loss": 1.5329, "step": 595, "task_loss": 0.7476761937141418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7090365886688232, "epoch": 0.5, "learning_rate": 2.5190194420963654e-05, "loss": 1.6372, "step": 596, "task_loss": 1.5144426822662354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5731834173202515, "epoch": 0.5, "learning_rate": 2.5232459847844463e-05, "loss": 1.6874, "step": 597, "task_loss": 0.6311089992523193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.3774452209472656, "epoch": 0.51, "learning_rate": 2.5274725274725276e-05, "loss": 2.1591, "step": 598, "task_loss": 2.3305716514587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4071381092071533, "epoch": 0.51, "learning_rate": 2.5316990701606085e-05, "loss": 1.5448, "step": 599, "task_loss": 1.1393566131591797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2116893529891968, "epoch": 0.51, "learning_rate": 2.5359256128486898e-05, "loss": 1.7468, "step": 600, "task_loss": 1.1680995225906372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7497591972351074, "epoch": 0.51, "learning_rate": 2.5401521555367707e-05, "loss": 2.2182, "step": 601, "task_loss": 1.7593193054199219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7641654014587402, "epoch": 0.51, "learning_rate": 2.5443786982248524e-05, "loss": 1.9017, "step": 602, "task_loss": 1.5303020477294922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8423734903335571, "epoch": 0.51, "learning_rate": 2.5486052409129336e-05, "loss": 1.5398, "step": 603, "task_loss": 0.9125271439552307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8104195594787598, "epoch": 0.51, "learning_rate": 2.5528317836010146e-05, "loss": 1.4813, "step": 604, "task_loss": 1.417677402496338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.104424238204956, "epoch": 0.51, "learning_rate": 2.557058326289096e-05, "loss": 1.5896, "step": 605, "task_loss": 1.5117765665054321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9082846641540527, "epoch": 0.51, "learning_rate": 2.5612848689771768e-05, "loss": 1.4617, "step": 606, "task_loss": 0.7563328146934509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.508881092071533, "epoch": 0.51, "learning_rate": 2.5655114116652577e-05, "loss": 2.011, "step": 607, "task_loss": 1.1263551712036133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.774326205253601, "epoch": 0.51, "learning_rate": 2.569737954353339e-05, "loss": 1.7182, "step": 608, "task_loss": 1.0811508893966675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4354971647262573, "epoch": 0.51, "learning_rate": 2.57396449704142e-05, "loss": 1.7321, "step": 609, "task_loss": 1.1322211027145386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1755032539367676, "epoch": 0.52, "learning_rate": 2.5781910397295012e-05, "loss": 1.5819, "step": 610, "task_loss": 1.032667636871338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.321526050567627, "epoch": 0.52, "learning_rate": 2.582417582417583e-05, "loss": 1.3548, "step": 611, "task_loss": 1.0109859704971313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.291067123413086, "epoch": 0.52, "learning_rate": 2.5866441251056638e-05, "loss": 1.5062, "step": 612, "task_loss": 0.7999576926231384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4695544242858887, "epoch": 0.52, "learning_rate": 2.590870667793745e-05, "loss": 1.7557, "step": 613, "task_loss": 1.4489662647247314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4045896530151367, "epoch": 0.52, "learning_rate": 2.595097210481826e-05, "loss": 1.8991, "step": 614, "task_loss": 0.850382387638092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.591586947441101, "epoch": 0.52, "learning_rate": 2.5993237531699073e-05, "loss": 1.7515, "step": 615, "task_loss": 2.0645599365234375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9465082883834839, "epoch": 0.52, "learning_rate": 2.6035502958579882e-05, "loss": 1.8586, "step": 616, "task_loss": 1.6432621479034424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8321236371994019, "epoch": 0.52, "learning_rate": 2.6077768385460695e-05, "loss": 1.6317, "step": 617, "task_loss": 2.2543234825134277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9009987115859985, "epoch": 0.52, "learning_rate": 2.6120033812341504e-05, "loss": 1.4536, "step": 618, "task_loss": 1.0427546501159668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8423457145690918, "epoch": 0.52, "learning_rate": 2.6162299239222317e-05, "loss": 1.6323, "step": 619, "task_loss": 0.9704166650772095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8022714853286743, "epoch": 0.52, "learning_rate": 2.6204564666103126e-05, "loss": 1.5321, "step": 620, "task_loss": 1.181983232498169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6737985610961914, "epoch": 0.52, "learning_rate": 2.6246830092983943e-05, "loss": 1.7455, "step": 621, "task_loss": 1.2462942600250244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.998228907585144, "epoch": 0.53, "learning_rate": 2.6289095519864755e-05, "loss": 1.5497, "step": 622, "task_loss": 1.9189759492874146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.310429573059082, "epoch": 0.53, "learning_rate": 2.6331360946745565e-05, "loss": 1.3369, "step": 623, "task_loss": 0.6672812700271606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1305723190307617, "epoch": 0.53, "learning_rate": 2.6373626373626374e-05, "loss": 1.6624, "step": 624, "task_loss": 1.0692880153656006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6739581823349, "epoch": 0.53, "learning_rate": 2.6415891800507187e-05, "loss": 1.8958, "step": 625, "task_loss": 1.1022433042526245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.4233789443969727, "epoch": 0.53, "learning_rate": 2.6458157227387996e-05, "loss": 1.8391, "step": 626, "task_loss": 1.3821601867675781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7215943336486816, "epoch": 0.53, "learning_rate": 2.650042265426881e-05, "loss": 1.5655, "step": 627, "task_loss": 0.9336962699890137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.641430377960205, "epoch": 0.53, "learning_rate": 2.654268808114962e-05, "loss": 1.5572, "step": 628, "task_loss": 1.1537126302719116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4905693531036377, "epoch": 0.53, "learning_rate": 2.658495350803043e-05, "loss": 1.1736, "step": 629, "task_loss": 1.3647817373275757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.776768445968628, "epoch": 0.53, "learning_rate": 2.6627218934911247e-05, "loss": 1.5419, "step": 630, "task_loss": 1.5495095252990723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.119025230407715, "epoch": 0.53, "learning_rate": 2.6669484361792057e-05, "loss": 1.7097, "step": 631, "task_loss": 0.6255828738212585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.060655117034912, "epoch": 0.53, "learning_rate": 2.671174978867287e-05, "loss": 1.6324, "step": 632, "task_loss": 1.9559584856033325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8026323318481445, "epoch": 0.53, "learning_rate": 2.675401521555368e-05, "loss": 1.3785, "step": 633, "task_loss": 0.9295580983161926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.2322726249694824, "epoch": 0.54, "learning_rate": 2.6796280642434492e-05, "loss": 1.7693, "step": 634, "task_loss": 1.053109884262085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1831504106521606, "epoch": 0.54, "learning_rate": 2.68385460693153e-05, "loss": 1.3001, "step": 635, "task_loss": 0.5708752870559692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6532008647918701, "epoch": 0.54, "learning_rate": 2.688081149619611e-05, "loss": 1.6953, "step": 636, "task_loss": 1.8451604843139648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2908958196640015, "epoch": 0.54, "learning_rate": 2.6923076923076923e-05, "loss": 1.5398, "step": 637, "task_loss": 0.7879374623298645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3271689414978027, "epoch": 0.54, "learning_rate": 2.6965342349957733e-05, "loss": 1.4815, "step": 638, "task_loss": 1.635392665863037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4123780727386475, "epoch": 0.54, "learning_rate": 2.7007607776838545e-05, "loss": 1.8863, "step": 639, "task_loss": 0.2529791593551636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.9431493282318115, "epoch": 0.54, "learning_rate": 2.704987320371936e-05, "loss": 1.9035, "step": 640, "task_loss": 1.4493820667266846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.060360074043274, "epoch": 0.54, "learning_rate": 2.709213863060017e-05, "loss": 1.5586, "step": 641, "task_loss": 0.9246127605438232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6228567361831665, "epoch": 0.54, "learning_rate": 2.7134404057480984e-05, "loss": 1.7603, "step": 642, "task_loss": 1.724980354309082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.020940065383911, "epoch": 0.54, "learning_rate": 2.7176669484361793e-05, "loss": 1.8246, "step": 643, "task_loss": 1.4467122554779053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1339294910430908, "epoch": 0.54, "learning_rate": 2.7218934911242606e-05, "loss": 1.6619, "step": 644, "task_loss": 0.595633864402771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6338285207748413, "epoch": 0.54, "learning_rate": 2.7261200338123415e-05, "loss": 1.6148, "step": 645, "task_loss": 0.931357204914093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5364899635314941, "epoch": 0.55, "learning_rate": 2.7303465765004228e-05, "loss": 1.5055, "step": 646, "task_loss": 0.731842041015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7353875637054443, "epoch": 0.55, "learning_rate": 2.7345731191885038e-05, "loss": 1.2771, "step": 647, "task_loss": 1.078820824623108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5952110290527344, "epoch": 0.55, "learning_rate": 2.738799661876585e-05, "loss": 1.6303, "step": 648, "task_loss": 0.7135847806930542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6236639022827148, "epoch": 0.55, "learning_rate": 2.743026204564666e-05, "loss": 1.2467, "step": 649, "task_loss": 0.8168407678604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8265674114227295, "epoch": 0.55, "learning_rate": 2.7472527472527476e-05, "loss": 1.328, "step": 650, "task_loss": 1.279393196105957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9181575775146484, "epoch": 0.55, "learning_rate": 2.751479289940829e-05, "loss": 1.808, "step": 651, "task_loss": 1.327600121498108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4949519634246826, "epoch": 0.55, "learning_rate": 2.7557058326289098e-05, "loss": 1.078, "step": 652, "task_loss": 0.9232909083366394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5886056423187256, "epoch": 0.55, "learning_rate": 2.7599323753169907e-05, "loss": 1.3246, "step": 653, "task_loss": 0.3847731649875641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.26932692527771, "epoch": 0.55, "learning_rate": 2.764158918005072e-05, "loss": 1.1954, "step": 654, "task_loss": 0.9609280824661255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.102677583694458, "epoch": 0.55, "learning_rate": 2.768385460693153e-05, "loss": 1.4832, "step": 655, "task_loss": 0.8220967650413513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7277220487594604, "epoch": 0.55, "learning_rate": 2.7726120033812342e-05, "loss": 1.522, "step": 656, "task_loss": 1.0032366514205933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0998575687408447, "epoch": 0.56, "learning_rate": 2.7768385460693152e-05, "loss": 1.2439, "step": 657, "task_loss": 0.7112149000167847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5570416450500488, "epoch": 0.56, "learning_rate": 2.7810650887573965e-05, "loss": 1.5928, "step": 658, "task_loss": 0.7085965275764465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4043692350387573, "epoch": 0.56, "learning_rate": 2.785291631445478e-05, "loss": 1.5282, "step": 659, "task_loss": 1.1843066215515137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8886358737945557, "epoch": 0.56, "learning_rate": 2.789518174133559e-05, "loss": 1.8172, "step": 660, "task_loss": 0.597008466720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.790836215019226, "epoch": 0.56, "learning_rate": 2.7937447168216403e-05, "loss": 1.711, "step": 661, "task_loss": 0.9670142531394958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.763425350189209, "epoch": 0.56, "learning_rate": 2.7979712595097212e-05, "loss": 1.5536, "step": 662, "task_loss": 1.916986346244812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6089192628860474, "epoch": 0.56, "learning_rate": 2.8021978021978025e-05, "loss": 1.5651, "step": 663, "task_loss": 0.8449296355247498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6258020401000977, "epoch": 0.56, "learning_rate": 2.8064243448858834e-05, "loss": 1.6864, "step": 664, "task_loss": 0.9355388879776001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0082406997680664, "epoch": 0.56, "learning_rate": 2.8106508875739644e-05, "loss": 1.4254, "step": 665, "task_loss": 1.5960066318511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8274633884429932, "epoch": 0.56, "learning_rate": 2.8148774302620457e-05, "loss": 1.3565, "step": 666, "task_loss": 1.0102416276931763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.732003092765808, "epoch": 0.56, "learning_rate": 2.8191039729501266e-05, "loss": 1.3596, "step": 667, "task_loss": 1.3154973983764648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.780278205871582, "epoch": 0.56, "learning_rate": 2.823330515638208e-05, "loss": 1.5606, "step": 668, "task_loss": 1.7912262678146362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5741322040557861, "epoch": 0.57, "learning_rate": 2.8275570583262895e-05, "loss": 1.4136, "step": 669, "task_loss": 1.7995178699493408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7928142547607422, "epoch": 0.57, "learning_rate": 2.8317836010143704e-05, "loss": 1.5902, "step": 670, "task_loss": 1.5148922204971313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6320691108703613, "epoch": 0.57, "learning_rate": 2.8360101437024517e-05, "loss": 1.8178, "step": 671, "task_loss": 1.483723521232605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1542911529541016, "epoch": 0.57, "learning_rate": 2.8402366863905327e-05, "loss": 1.4133, "step": 672, "task_loss": 0.6468456983566284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7527374029159546, "epoch": 0.57, "learning_rate": 2.844463229078614e-05, "loss": 1.4153, "step": 673, "task_loss": 1.983681082725525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0837173461914062, "epoch": 0.57, "learning_rate": 2.848689771766695e-05, "loss": 1.3309, "step": 674, "task_loss": 0.7219658493995667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.297276020050049, "epoch": 0.57, "learning_rate": 2.852916314454776e-05, "loss": 1.7485, "step": 675, "task_loss": 1.6987696886062622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.675417184829712, "epoch": 0.57, "learning_rate": 2.857142857142857e-05, "loss": 1.6139, "step": 676, "task_loss": 0.8685016632080078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4889869689941406, "epoch": 0.57, "learning_rate": 2.8613693998309384e-05, "loss": 1.454, "step": 677, "task_loss": 0.6011234521865845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4284563064575195, "epoch": 0.57, "learning_rate": 2.8655959425190193e-05, "loss": 1.3535, "step": 678, "task_loss": 0.5318889617919922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.252799153327942, "epoch": 0.57, "learning_rate": 2.869822485207101e-05, "loss": 1.405, "step": 679, "task_loss": 1.4317924976348877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4981613159179688, "epoch": 0.57, "learning_rate": 2.8740490278951822e-05, "loss": 1.3115, "step": 680, "task_loss": 1.30126953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.060190439224243, "epoch": 0.58, "learning_rate": 2.878275570583263e-05, "loss": 1.6098, "step": 681, "task_loss": 0.8986676931381226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.408935546875, "epoch": 0.58, "learning_rate": 2.882502113271344e-05, "loss": 1.6122, "step": 682, "task_loss": 0.6317296624183655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0578150749206543, "epoch": 0.58, "learning_rate": 2.8867286559594254e-05, "loss": 1.4871, "step": 683, "task_loss": 1.127751111984253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.006550908088684, "epoch": 0.58, "learning_rate": 2.8909551986475063e-05, "loss": 1.1262, "step": 684, "task_loss": 1.1229885816574097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8016066551208496, "epoch": 0.58, "learning_rate": 2.8951817413355876e-05, "loss": 1.368, "step": 685, "task_loss": 1.1983249187469482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.1605045795440674, "epoch": 0.58, "learning_rate": 2.8994082840236685e-05, "loss": 1.494, "step": 686, "task_loss": 0.6800357699394226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5272266864776611, "epoch": 0.58, "learning_rate": 2.9036348267117498e-05, "loss": 1.3462, "step": 687, "task_loss": 1.8150286674499512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0030484199523926, "epoch": 0.58, "learning_rate": 2.9078613693998314e-05, "loss": 1.1387, "step": 688, "task_loss": 0.8048574924468994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.40828275680542, "epoch": 0.58, "learning_rate": 2.9120879120879123e-05, "loss": 1.3236, "step": 689, "task_loss": 0.5279685258865356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1242307424545288, "epoch": 0.58, "learning_rate": 2.9163144547759936e-05, "loss": 1.4809, "step": 690, "task_loss": 0.4531760513782501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8669126033782959, "epoch": 0.58, "learning_rate": 2.9205409974640746e-05, "loss": 1.2519, "step": 691, "task_loss": 0.3343375623226166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5432629585266113, "epoch": 0.58, "learning_rate": 2.924767540152156e-05, "loss": 1.5538, "step": 692, "task_loss": 1.3851969242095947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4076192378997803, "epoch": 0.59, "learning_rate": 2.9289940828402368e-05, "loss": 1.4887, "step": 693, "task_loss": 1.8642549514770508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.7077107429504395, "epoch": 0.59, "learning_rate": 2.933220625528318e-05, "loss": 1.51, "step": 694, "task_loss": 1.6698142290115356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5578193664550781, "epoch": 0.59, "learning_rate": 2.937447168216399e-05, "loss": 1.4737, "step": 695, "task_loss": 1.489617943763733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1586830615997314, "epoch": 0.59, "learning_rate": 2.94167371090448e-05, "loss": 1.2867, "step": 696, "task_loss": 0.6294510364532471 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1899855136871338, "epoch": 0.59, "learning_rate": 2.9459002535925612e-05, "loss": 1.1781, "step": 697, "task_loss": 0.8063412308692932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8221255540847778, "epoch": 0.59, "learning_rate": 2.9501267962806428e-05, "loss": 1.4055, "step": 698, "task_loss": 0.6587286591529846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3647469282150269, "epoch": 0.59, "learning_rate": 2.9543533389687238e-05, "loss": 1.3671, "step": 699, "task_loss": 0.5070088505744934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3376853466033936, "epoch": 0.59, "learning_rate": 2.958579881656805e-05, "loss": 1.4248, "step": 700, "task_loss": 0.905743420124054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.329820990562439, "epoch": 0.59, "learning_rate": 2.962806424344886e-05, "loss": 1.5463, "step": 701, "task_loss": 0.8405337929725647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7667867541313171, "epoch": 0.59, "learning_rate": 2.9670329670329673e-05, "loss": 1.0905, "step": 702, "task_loss": 0.7684465646743774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4072433710098267, "epoch": 0.59, "learning_rate": 2.9712595097210482e-05, "loss": 1.3943, "step": 703, "task_loss": 1.0100133419036865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9178508520126343, "epoch": 0.59, "learning_rate": 2.9754860524091295e-05, "loss": 1.786, "step": 704, "task_loss": 1.9493777751922607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9487297534942627, "epoch": 0.6, "learning_rate": 2.9797125950972104e-05, "loss": 1.4663, "step": 705, "task_loss": 2.259042263031006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.528301477432251, "epoch": 0.6, "learning_rate": 2.9839391377852917e-05, "loss": 1.1631, "step": 706, "task_loss": 0.28674349188804626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3450632095336914, "epoch": 0.6, "learning_rate": 2.9881656804733733e-05, "loss": 1.4949, "step": 707, "task_loss": 1.143014907836914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.409010887145996, "epoch": 0.6, "learning_rate": 2.9923922231614543e-05, "loss": 1.3435, "step": 708, "task_loss": 1.0360783338546753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.161576747894287, "epoch": 0.6, "learning_rate": 2.9966187658495355e-05, "loss": 1.7862, "step": 709, "task_loss": 0.5741552710533142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.795288622379303, "epoch": 0.6, "learning_rate": 3.0008453085376165e-05, "loss": 1.4654, "step": 710, "task_loss": 1.225522518157959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9617218971252441, "epoch": 0.6, "learning_rate": 3.0050718512256974e-05, "loss": 1.4648, "step": 711, "task_loss": 0.9943562746047974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.620964765548706, "epoch": 0.6, "learning_rate": 3.0092983939137787e-05, "loss": 1.6005, "step": 712, "task_loss": 1.2757292985916138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2072701454162598, "epoch": 0.6, "learning_rate": 3.0135249366018596e-05, "loss": 1.0605, "step": 713, "task_loss": 0.8487045168876648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8367528915405273, "epoch": 0.6, "learning_rate": 3.017751479289941e-05, "loss": 1.8295, "step": 714, "task_loss": 0.8496079444885254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1372811794281006, "epoch": 0.6, "learning_rate": 3.021978021978022e-05, "loss": 1.1556, "step": 715, "task_loss": 0.845334529876709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0037188529968262, "epoch": 0.6, "learning_rate": 3.026204564666103e-05, "loss": 1.0427, "step": 716, "task_loss": 1.1058974266052246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3244569301605225, "epoch": 0.61, "learning_rate": 3.0304311073541847e-05, "loss": 1.3545, "step": 717, "task_loss": 1.2813187837600708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1645981073379517, "epoch": 0.61, "learning_rate": 3.0346576500422657e-05, "loss": 1.2698, "step": 718, "task_loss": 0.7977056503295898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3454211950302124, "epoch": 0.61, "learning_rate": 3.038884192730347e-05, "loss": 1.3901, "step": 719, "task_loss": 1.0199166536331177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.060537338256836, "epoch": 0.61, "learning_rate": 3.043110735418428e-05, "loss": 1.5656, "step": 720, "task_loss": 1.0599092245101929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6241382360458374, "epoch": 0.61, "learning_rate": 3.047337278106509e-05, "loss": 1.2238, "step": 721, "task_loss": 2.105393171310425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5769617557525635, "epoch": 0.61, "learning_rate": 3.05156382079459e-05, "loss": 1.1988, "step": 722, "task_loss": 0.9134297370910645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0414657592773438, "epoch": 0.61, "learning_rate": 3.0557903634826714e-05, "loss": 1.5899, "step": 723, "task_loss": 0.6078497171401978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5410141944885254, "epoch": 0.61, "learning_rate": 3.060016906170752e-05, "loss": 1.3623, "step": 724, "task_loss": 1.8485307693481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8764464855194092, "epoch": 0.61, "learning_rate": 3.064243448858833e-05, "loss": 1.3635, "step": 725, "task_loss": 1.9136000871658325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1411190032958984, "epoch": 0.61, "learning_rate": 3.068469991546914e-05, "loss": 1.4542, "step": 726, "task_loss": 0.7353646159172058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1371047496795654, "epoch": 0.61, "learning_rate": 3.072696534234996e-05, "loss": 1.1312, "step": 727, "task_loss": 1.0274072885513306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9875434041023254, "epoch": 0.61, "learning_rate": 3.0769230769230774e-05, "loss": 1.1656, "step": 728, "task_loss": 0.7100459933280945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0378708839416504, "epoch": 0.62, "learning_rate": 3.0811496196111584e-05, "loss": 1.4261, "step": 729, "task_loss": 1.214665412902832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5776360034942627, "epoch": 0.62, "learning_rate": 3.085376162299239e-05, "loss": 1.4927, "step": 730, "task_loss": 1.2391765117645264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9539617300033569, "epoch": 0.62, "learning_rate": 3.08960270498732e-05, "loss": 1.1092, "step": 731, "task_loss": 1.031744122505188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7882373332977295, "epoch": 0.62, "learning_rate": 3.093829247675402e-05, "loss": 1.2236, "step": 732, "task_loss": 1.0188167095184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8838756084442139, "epoch": 0.62, "learning_rate": 3.098055790363483e-05, "loss": 1.3281, "step": 733, "task_loss": 1.997550368309021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2788608074188232, "epoch": 0.62, "learning_rate": 3.102282333051564e-05, "loss": 1.3928, "step": 734, "task_loss": 0.8079401850700378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7655508518218994, "epoch": 0.62, "learning_rate": 3.106508875739645e-05, "loss": 1.3034, "step": 735, "task_loss": 0.6227017045021057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4453158378601074, "epoch": 0.62, "learning_rate": 3.110735418427726e-05, "loss": 1.3911, "step": 736, "task_loss": 0.9161060452461243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3663349151611328, "epoch": 0.62, "learning_rate": 3.114961961115808e-05, "loss": 0.9101, "step": 737, "task_loss": 0.842225193977356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5438990592956543, "epoch": 0.62, "learning_rate": 3.119188503803889e-05, "loss": 1.5341, "step": 738, "task_loss": 0.47739672660827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2349551916122437, "epoch": 0.62, "learning_rate": 3.12341504649197e-05, "loss": 1.396, "step": 739, "task_loss": 0.9894749522209167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0047895908355713, "epoch": 0.63, "learning_rate": 3.127641589180051e-05, "loss": 0.8866, "step": 740, "task_loss": 0.9324040412902832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4260210990905762, "epoch": 0.63, "learning_rate": 3.131868131868132e-05, "loss": 1.2471, "step": 741, "task_loss": 0.3376115560531616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7668278217315674, "epoch": 0.63, "learning_rate": 3.136094674556213e-05, "loss": 1.3282, "step": 742, "task_loss": 1.7642693519592285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4765636920928955, "epoch": 0.63, "learning_rate": 3.140321217244294e-05, "loss": 1.3615, "step": 743, "task_loss": 1.8432987928390503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4979791641235352, "epoch": 0.63, "learning_rate": 3.144547759932375e-05, "loss": 1.2393, "step": 744, "task_loss": 1.9573218822479248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3229753971099854, "epoch": 0.63, "learning_rate": 3.148774302620456e-05, "loss": 1.2009, "step": 745, "task_loss": 0.6425274610519409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9752696752548218, "epoch": 0.63, "learning_rate": 3.153000845308538e-05, "loss": 1.2667, "step": 746, "task_loss": 1.6002529859542847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6821339726448059, "epoch": 0.63, "learning_rate": 3.1572273879966193e-05, "loss": 1.0693, "step": 747, "task_loss": 0.3564088046550751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3711963891983032, "epoch": 0.63, "learning_rate": 3.1614539306847e-05, "loss": 1.3266, "step": 748, "task_loss": 1.7210822105407715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7442446351051331, "epoch": 0.63, "learning_rate": 3.165680473372781e-05, "loss": 1.0009, "step": 749, "task_loss": 0.538000226020813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6805883646011353, "epoch": 0.63, "learning_rate": 3.169907016060862e-05, "loss": 1.4701, "step": 750, "task_loss": 0.820415735244751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4134013652801514, "epoch": 0.63, "learning_rate": 3.174133558748944e-05, "loss": 1.1963, "step": 751, "task_loss": 1.3457452058792114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8930699825286865, "epoch": 0.64, "learning_rate": 3.178360101437025e-05, "loss": 1.2316, "step": 752, "task_loss": 1.210572361946106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1586639881134033, "epoch": 0.64, "learning_rate": 3.1825866441251057e-05, "loss": 1.1674, "step": 753, "task_loss": 1.4667516946792603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9502545595169067, "epoch": 0.64, "learning_rate": 3.1868131868131866e-05, "loss": 0.8921, "step": 754, "task_loss": 0.8325430750846863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9357709884643555, "epoch": 0.64, "learning_rate": 3.1910397295012675e-05, "loss": 0.9295, "step": 755, "task_loss": 0.762615442276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7579373717308044, "epoch": 0.64, "learning_rate": 3.195266272189349e-05, "loss": 1.0436, "step": 756, "task_loss": 0.7855313420295715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9126052856445312, "epoch": 0.64, "learning_rate": 3.199492814877431e-05, "loss": 1.551, "step": 757, "task_loss": 1.4528396129608154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0547230243682861, "epoch": 0.64, "learning_rate": 3.203719357565512e-05, "loss": 1.0869, "step": 758, "task_loss": 0.6683381795883179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0525503158569336, "epoch": 0.64, "learning_rate": 3.2079459002535926e-05, "loss": 0.9953, "step": 759, "task_loss": 0.18759943544864655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8616998195648193, "epoch": 0.64, "learning_rate": 3.2121724429416736e-05, "loss": 1.3584, "step": 760, "task_loss": 0.9390403032302856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9221686720848083, "epoch": 0.64, "learning_rate": 3.216398985629755e-05, "loss": 1.3151, "step": 761, "task_loss": 0.7151225805282593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3533369302749634, "epoch": 0.64, "learning_rate": 3.220625528317836e-05, "loss": 0.9055, "step": 762, "task_loss": 1.5168225765228271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.100745677947998, "epoch": 0.64, "learning_rate": 3.224852071005917e-05, "loss": 1.2536, "step": 763, "task_loss": 0.38282209634780884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0497146844863892, "epoch": 0.65, "learning_rate": 3.229078613693998e-05, "loss": 1.2617, "step": 764, "task_loss": 0.8950813412666321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9626649618148804, "epoch": 0.65, "learning_rate": 3.2333051563820796e-05, "loss": 1.4889, "step": 765, "task_loss": 0.6804001927375793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1379417181015015, "epoch": 0.65, "learning_rate": 3.237531699070161e-05, "loss": 1.2559, "step": 766, "task_loss": 0.870660662651062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.016981840133667, "epoch": 0.65, "learning_rate": 3.241758241758242e-05, "loss": 1.4095, "step": 767, "task_loss": 1.0748240947723389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8861956000328064, "epoch": 0.65, "learning_rate": 3.245984784446323e-05, "loss": 1.2675, "step": 768, "task_loss": 0.615392804145813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8045220375061035, "epoch": 0.65, "learning_rate": 3.250211327134404e-05, "loss": 0.9897, "step": 769, "task_loss": 1.6343724727630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.56914484500885, "epoch": 0.65, "learning_rate": 3.254437869822485e-05, "loss": 1.184, "step": 770, "task_loss": 1.6107184886932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2962971925735474, "epoch": 0.65, "learning_rate": 3.2586644125105666e-05, "loss": 1.3232, "step": 771, "task_loss": 0.7560681104660034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2538061141967773, "epoch": 0.65, "learning_rate": 3.2628909551986476e-05, "loss": 1.0573, "step": 772, "task_loss": 1.1959505081176758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0524084568023682, "epoch": 0.65, "learning_rate": 3.2671174978867285e-05, "loss": 1.4628, "step": 773, "task_loss": 0.5565171837806702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1373214721679688, "epoch": 0.65, "learning_rate": 3.2713440405748094e-05, "loss": 1.123, "step": 774, "task_loss": 1.6657607555389404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0028746128082275, "epoch": 0.65, "learning_rate": 3.275570583262891e-05, "loss": 1.0034, "step": 775, "task_loss": 0.790636420249939 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6192691326141357, "epoch": 0.66, "learning_rate": 3.279797125950973e-05, "loss": 1.3584, "step": 776, "task_loss": 1.3156646490097046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8031277656555176, "epoch": 0.66, "learning_rate": 3.2840236686390536e-05, "loss": 0.9986, "step": 777, "task_loss": 0.15089592337608337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4036678075790405, "epoch": 0.66, "learning_rate": 3.2882502113271346e-05, "loss": 0.9864, "step": 778, "task_loss": 1.3838911056518555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6737282276153564, "epoch": 0.66, "learning_rate": 3.2924767540152155e-05, "loss": 1.0778, "step": 779, "task_loss": 0.986405611038208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1664226055145264, "epoch": 0.66, "learning_rate": 3.296703296703297e-05, "loss": 1.1947, "step": 780, "task_loss": 0.6116193532943726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7758909463882446, "epoch": 0.66, "learning_rate": 3.300929839391378e-05, "loss": 1.0504, "step": 781, "task_loss": 0.7467387318611145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0047651529312134, "epoch": 0.66, "learning_rate": 3.305156382079459e-05, "loss": 1.0698, "step": 782, "task_loss": 1.6976722478866577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9224201440811157, "epoch": 0.66, "learning_rate": 3.30938292476754e-05, "loss": 0.9493, "step": 783, "task_loss": 0.40012145042419434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.945979118347168, "epoch": 0.66, "learning_rate": 3.3136094674556215e-05, "loss": 1.2699, "step": 784, "task_loss": 0.8882613778114319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4105168581008911, "epoch": 0.66, "learning_rate": 3.317836010143703e-05, "loss": 1.0217, "step": 785, "task_loss": 1.1323537826538086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0683871507644653, "epoch": 0.66, "learning_rate": 3.322062552831784e-05, "loss": 1.206, "step": 786, "task_loss": 0.3326896131038666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7210911512374878, "epoch": 0.66, "learning_rate": 3.326289095519865e-05, "loss": 1.1225, "step": 787, "task_loss": 0.579444408416748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0139968395233154, "epoch": 0.67, "learning_rate": 3.330515638207946e-05, "loss": 1.2026, "step": 788, "task_loss": 0.6448048949241638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.266096830368042, "epoch": 0.67, "learning_rate": 3.334742180896027e-05, "loss": 1.1261, "step": 789, "task_loss": 0.6363561153411865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.163905382156372, "epoch": 0.67, "learning_rate": 3.3389687235841085e-05, "loss": 1.2274, "step": 790, "task_loss": 1.3863892555236816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2620033025741577, "epoch": 0.67, "learning_rate": 3.3431952662721895e-05, "loss": 1.4208, "step": 791, "task_loss": 0.47367507219314575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8582741022109985, "epoch": 0.67, "learning_rate": 3.3474218089602704e-05, "loss": 1.1125, "step": 792, "task_loss": 1.041414737701416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0617467164993286, "epoch": 0.67, "learning_rate": 3.3516483516483513e-05, "loss": 1.1794, "step": 793, "task_loss": 0.8185568451881409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4089443683624268, "epoch": 0.67, "learning_rate": 3.355874894336433e-05, "loss": 1.0588, "step": 794, "task_loss": 1.1982653141021729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7130084037780762, "epoch": 0.67, "learning_rate": 3.3601014370245146e-05, "loss": 1.2627, "step": 795, "task_loss": 0.8388482332229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3874064683914185, "epoch": 0.67, "learning_rate": 3.3643279797125955e-05, "loss": 1.1283, "step": 796, "task_loss": 0.9146378636360168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.582143783569336, "epoch": 0.67, "learning_rate": 3.3685545224006765e-05, "loss": 1.2534, "step": 797, "task_loss": 1.2254849672317505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9507005214691162, "epoch": 0.67, "learning_rate": 3.3727810650887574e-05, "loss": 1.2953, "step": 798, "task_loss": 2.448974609375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6781567931175232, "epoch": 0.67, "learning_rate": 3.377007607776838e-05, "loss": 1.1329, "step": 799, "task_loss": 0.5950313210487366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.929793119430542, "epoch": 0.68, "learning_rate": 3.38123415046492e-05, "loss": 1.2051, "step": 800, "task_loss": 0.6294661164283752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6224916577339172, "epoch": 0.68, "learning_rate": 3.385460693153001e-05, "loss": 0.9265, "step": 801, "task_loss": 0.24103760719299316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9142379760742188, "epoch": 0.68, "learning_rate": 3.389687235841082e-05, "loss": 1.3758, "step": 802, "task_loss": 1.322835087776184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.315333604812622, "epoch": 0.68, "learning_rate": 3.393913778529163e-05, "loss": 1.4024, "step": 803, "task_loss": 1.4778238534927368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.067352533340454, "epoch": 0.68, "learning_rate": 3.3981403212172444e-05, "loss": 0.8641, "step": 804, "task_loss": 1.5639110803604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.243212103843689, "epoch": 0.68, "learning_rate": 3.402366863905326e-05, "loss": 1.1398, "step": 805, "task_loss": 0.8332517743110657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7636231184005737, "epoch": 0.68, "learning_rate": 3.406593406593407e-05, "loss": 1.3156, "step": 806, "task_loss": 1.280535101890564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.323514699935913, "epoch": 0.68, "learning_rate": 3.410819949281488e-05, "loss": 1.024, "step": 807, "task_loss": 0.9092371463775635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.464396357536316, "epoch": 0.68, "learning_rate": 3.415046491969569e-05, "loss": 1.3036, "step": 808, "task_loss": 1.3983303308486938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9059646129608154, "epoch": 0.68, "learning_rate": 3.4192730346576504e-05, "loss": 1.0737, "step": 809, "task_loss": 1.2400716543197632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2472960948944092, "epoch": 0.68, "learning_rate": 3.4234995773457314e-05, "loss": 1.1997, "step": 810, "task_loss": 1.3540829420089722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8057377338409424, "epoch": 0.69, "learning_rate": 3.427726120033812e-05, "loss": 0.7798, "step": 811, "task_loss": 0.2278326153755188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0885618925094604, "epoch": 0.69, "learning_rate": 3.431952662721893e-05, "loss": 1.1224, "step": 812, "task_loss": 0.5487210154533386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.021493673324585, "epoch": 0.69, "learning_rate": 3.436179205409975e-05, "loss": 0.9995, "step": 813, "task_loss": 0.2578725218772888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0298852920532227, "epoch": 0.69, "learning_rate": 3.4404057480980565e-05, "loss": 0.9818, "step": 814, "task_loss": 1.0327537059783936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2061482667922974, "epoch": 0.69, "learning_rate": 3.4446322907861374e-05, "loss": 1.2802, "step": 815, "task_loss": 0.5380061268806458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5140435695648193, "epoch": 0.69, "learning_rate": 3.4488588334742184e-05, "loss": 0.8722, "step": 816, "task_loss": 0.31900647282600403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.659974217414856, "epoch": 0.69, "learning_rate": 3.453085376162299e-05, "loss": 0.8583, "step": 817, "task_loss": 0.7299270033836365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.0004239082336426, "epoch": 0.69, "learning_rate": 3.45731191885038e-05, "loss": 1.4279, "step": 818, "task_loss": 0.9796584844589233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1866278648376465, "epoch": 0.69, "learning_rate": 3.461538461538462e-05, "loss": 1.0565, "step": 819, "task_loss": 1.3525701761245728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8020912408828735, "epoch": 0.69, "learning_rate": 3.465765004226543e-05, "loss": 1.4161, "step": 820, "task_loss": 1.020184874534607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7706226110458374, "epoch": 0.69, "learning_rate": 3.469991546914624e-05, "loss": 0.9847, "step": 821, "task_loss": 0.7895649075508118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6366397738456726, "epoch": 0.69, "learning_rate": 3.474218089602705e-05, "loss": 0.98, "step": 822, "task_loss": 1.0765902996063232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8741238117218018, "epoch": 0.7, "learning_rate": 3.478444632290786e-05, "loss": 0.9404, "step": 823, "task_loss": 0.5794886946678162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.686323881149292, "epoch": 0.7, "learning_rate": 3.482671174978868e-05, "loss": 1.2467, "step": 824, "task_loss": 0.9441559314727783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8098551034927368, "epoch": 0.7, "learning_rate": 3.486897717666949e-05, "loss": 0.8981, "step": 825, "task_loss": 0.3559379577636719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7981609106063843, "epoch": 0.7, "learning_rate": 3.49112426035503e-05, "loss": 0.9523, "step": 826, "task_loss": 0.6286649107933044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2563947439193726, "epoch": 0.7, "learning_rate": 3.495350803043111e-05, "loss": 1.0617, "step": 827, "task_loss": 1.103269100189209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.69051194190979, "epoch": 0.7, "learning_rate": 3.499577345731192e-05, "loss": 1.212, "step": 828, "task_loss": 0.49598947167396545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8391950726509094, "epoch": 0.7, "learning_rate": 3.503803888419273e-05, "loss": 0.9746, "step": 829, "task_loss": 0.7219027280807495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4685629606246948, "epoch": 0.7, "learning_rate": 3.508030431107354e-05, "loss": 1.1247, "step": 830, "task_loss": 1.080963134765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1293282508850098, "epoch": 0.7, "learning_rate": 3.512256973795435e-05, "loss": 1.0864, "step": 831, "task_loss": 1.5669431686401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.250570297241211, "epoch": 0.7, "learning_rate": 3.516483516483517e-05, "loss": 0.9911, "step": 832, "task_loss": 1.3642091751098633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.125456690788269, "epoch": 0.7, "learning_rate": 3.520710059171598e-05, "loss": 1.1822, "step": 833, "task_loss": 2.5408148765563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8799680471420288, "epoch": 0.7, "learning_rate": 3.524936601859679e-05, "loss": 1.1337, "step": 834, "task_loss": 1.5254669189453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.424616813659668, "epoch": 0.71, "learning_rate": 3.52916314454776e-05, "loss": 1.1357, "step": 835, "task_loss": 1.3318520784378052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3314528465270996, "epoch": 0.71, "learning_rate": 3.533389687235841e-05, "loss": 1.002, "step": 836, "task_loss": 1.339030146598816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7002354860305786, "epoch": 0.71, "learning_rate": 3.537616229923922e-05, "loss": 1.1341, "step": 837, "task_loss": 0.7738394737243652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8383018374443054, "epoch": 0.71, "learning_rate": 3.541842772612004e-05, "loss": 1.1663, "step": 838, "task_loss": 0.9395821690559387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2065908908843994, "epoch": 0.71, "learning_rate": 3.546069315300085e-05, "loss": 1.1805, "step": 839, "task_loss": 0.7872021198272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7227573394775391, "epoch": 0.71, "learning_rate": 3.5502958579881656e-05, "loss": 0.8076, "step": 840, "task_loss": 1.3456993103027344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.201753854751587, "epoch": 0.71, "learning_rate": 3.5545224006762466e-05, "loss": 1.2693, "step": 841, "task_loss": 0.9918860793113708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.8004281520843506, "epoch": 0.71, "learning_rate": 3.558748943364328e-05, "loss": 1.2814, "step": 842, "task_loss": 1.1303761005401611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5839501619338989, "epoch": 0.71, "learning_rate": 3.56297548605241e-05, "loss": 1.0925, "step": 843, "task_loss": 0.3605928421020508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9251384139060974, "epoch": 0.71, "learning_rate": 3.567202028740491e-05, "loss": 1.074, "step": 844, "task_loss": 0.2997857332229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5477241277694702, "epoch": 0.71, "learning_rate": 3.571428571428572e-05, "loss": 1.2679, "step": 845, "task_loss": 0.6827241778373718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4141407012939453, "epoch": 0.71, "learning_rate": 3.5756551141166526e-05, "loss": 1.1589, "step": 846, "task_loss": 0.8392406105995178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.366410255432129, "epoch": 0.72, "learning_rate": 3.5798816568047336e-05, "loss": 1.0562, "step": 847, "task_loss": 1.897608757019043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.123450756072998, "epoch": 0.72, "learning_rate": 3.584108199492815e-05, "loss": 1.1623, "step": 848, "task_loss": 1.5228495597839355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6552289724349976, "epoch": 0.72, "learning_rate": 3.588334742180896e-05, "loss": 1.0923, "step": 849, "task_loss": 0.8985643982887268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6603604555130005, "epoch": 0.72, "learning_rate": 3.592561284868977e-05, "loss": 0.8798, "step": 850, "task_loss": 1.2749196290969849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0338457822799683, "epoch": 0.72, "learning_rate": 3.596787827557058e-05, "loss": 1.1585, "step": 851, "task_loss": 1.3262828588485718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2368450164794922, "epoch": 0.72, "learning_rate": 3.6010143702451396e-05, "loss": 1.0772, "step": 852, "task_loss": 1.0315959453582764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.005903720855713, "epoch": 0.72, "learning_rate": 3.605240912933221e-05, "loss": 1.2439, "step": 853, "task_loss": 0.48447489738464355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.346367359161377, "epoch": 0.72, "learning_rate": 3.609467455621302e-05, "loss": 0.8882, "step": 854, "task_loss": 0.4428337514400482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.336367130279541, "epoch": 0.72, "learning_rate": 3.613693998309383e-05, "loss": 1.0771, "step": 855, "task_loss": 1.4890811443328857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.063593864440918, "epoch": 0.72, "learning_rate": 3.617920540997464e-05, "loss": 0.9776, "step": 856, "task_loss": 0.37484872341156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.009964108467102, "epoch": 0.72, "learning_rate": 3.622147083685546e-05, "loss": 0.9593, "step": 857, "task_loss": 0.6104087829589844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0258214473724365, "epoch": 0.72, "learning_rate": 3.6263736263736266e-05, "loss": 1.1007, "step": 858, "task_loss": 1.0949828624725342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2210016250610352, "epoch": 0.73, "learning_rate": 3.6306001690617076e-05, "loss": 1.0817, "step": 859, "task_loss": 1.6341801881790161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1012241840362549, "epoch": 0.73, "learning_rate": 3.6348267117497885e-05, "loss": 0.7995, "step": 860, "task_loss": 0.48184433579444885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4935442209243774, "epoch": 0.73, "learning_rate": 3.63905325443787e-05, "loss": 1.2728, "step": 861, "task_loss": 0.46124008297920227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7171981930732727, "epoch": 0.73, "learning_rate": 3.643279797125951e-05, "loss": 1.016, "step": 862, "task_loss": 1.1392186880111694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4614613056182861, "epoch": 0.73, "learning_rate": 3.647506339814033e-05, "loss": 1.0917, "step": 863, "task_loss": 1.782379150390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0746396780014038, "epoch": 0.73, "learning_rate": 3.6517328825021136e-05, "loss": 0.9524, "step": 864, "task_loss": 0.4085269570350647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9016843438148499, "epoch": 0.73, "learning_rate": 3.6559594251901945e-05, "loss": 0.9237, "step": 865, "task_loss": 1.166468620300293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4837757349014282, "epoch": 0.73, "learning_rate": 3.6601859678782755e-05, "loss": 0.769, "step": 866, "task_loss": 0.3351168632507324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8171372413635254, "epoch": 0.73, "learning_rate": 3.664412510566357e-05, "loss": 0.8371, "step": 867, "task_loss": 1.5543311834335327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1961016654968262, "epoch": 0.73, "learning_rate": 3.668639053254438e-05, "loss": 1.2246, "step": 868, "task_loss": 0.3919358253479004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7490168809890747, "epoch": 0.73, "learning_rate": 3.672865595942519e-05, "loss": 1.3098, "step": 869, "task_loss": 0.8026441931724548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9559720754623413, "epoch": 0.73, "learning_rate": 3.6770921386306e-05, "loss": 1.0877, "step": 870, "task_loss": 0.7977285385131836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8871562480926514, "epoch": 0.74, "learning_rate": 3.6813186813186815e-05, "loss": 0.9033, "step": 871, "task_loss": 1.9800527095794678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6465833187103271, "epoch": 0.74, "learning_rate": 3.685545224006763e-05, "loss": 1.0784, "step": 872, "task_loss": 1.458868145942688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9516457319259644, "epoch": 0.74, "learning_rate": 3.689771766694844e-05, "loss": 0.9062, "step": 873, "task_loss": 0.631178617477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7906115651130676, "epoch": 0.74, "learning_rate": 3.693998309382925e-05, "loss": 0.9737, "step": 874, "task_loss": 0.678638756275177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.786811351776123, "epoch": 0.74, "learning_rate": 3.698224852071006e-05, "loss": 0.8342, "step": 875, "task_loss": 0.5169845819473267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9709447622299194, "epoch": 0.74, "learning_rate": 3.702451394759087e-05, "loss": 0.9206, "step": 876, "task_loss": 0.7110479474067688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1406593322753906, "epoch": 0.74, "learning_rate": 3.7066779374471685e-05, "loss": 1.0922, "step": 877, "task_loss": 1.4186698198318481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0560839176177979, "epoch": 0.74, "learning_rate": 3.7109044801352495e-05, "loss": 0.995, "step": 878, "task_loss": 0.876980185508728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1954360008239746, "epoch": 0.74, "learning_rate": 3.7151310228233304e-05, "loss": 1.2067, "step": 879, "task_loss": 0.7943779230117798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3926739692687988, "epoch": 0.74, "learning_rate": 3.7193575655114113e-05, "loss": 1.2232, "step": 880, "task_loss": 1.330909013748169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8508328795433044, "epoch": 0.74, "learning_rate": 3.723584108199493e-05, "loss": 0.8478, "step": 881, "task_loss": 0.2635786831378937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.735930323600769, "epoch": 0.75, "learning_rate": 3.7278106508875746e-05, "loss": 1.1152, "step": 882, "task_loss": 0.3356916010379791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.267694354057312, "epoch": 0.75, "learning_rate": 3.7320371935756555e-05, "loss": 1.004, "step": 883, "task_loss": 1.1062166690826416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9897673726081848, "epoch": 0.75, "learning_rate": 3.7362637362637365e-05, "loss": 1.0358, "step": 884, "task_loss": 0.405316561460495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.065746784210205, "epoch": 0.75, "learning_rate": 3.7404902789518174e-05, "loss": 0.8564, "step": 885, "task_loss": 1.871180534362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9644671082496643, "epoch": 0.75, "learning_rate": 3.744716821639899e-05, "loss": 0.8845, "step": 886, "task_loss": 0.7812583446502686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5859856009483337, "epoch": 0.75, "learning_rate": 3.74894336432798e-05, "loss": 0.9276, "step": 887, "task_loss": 0.9026466012001038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0855333805084229, "epoch": 0.75, "learning_rate": 3.753169907016061e-05, "loss": 0.8677, "step": 888, "task_loss": 0.5261902809143066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8959407806396484, "epoch": 0.75, "learning_rate": 3.757396449704142e-05, "loss": 0.8746, "step": 889, "task_loss": 0.6883710026741028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2657511234283447, "epoch": 0.75, "learning_rate": 3.7616229923922234e-05, "loss": 1.095, "step": 890, "task_loss": 0.5865076780319214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5554494857788086, "epoch": 0.75, "learning_rate": 3.7658495350803044e-05, "loss": 1.1688, "step": 891, "task_loss": 2.1562516689300537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9893829822540283, "epoch": 0.75, "learning_rate": 3.770076077768386e-05, "loss": 0.9013, "step": 892, "task_loss": 1.5681660175323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6857393980026245, "epoch": 0.75, "learning_rate": 3.774302620456467e-05, "loss": 1.0543, "step": 893, "task_loss": 0.49078652262687683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0735504627227783, "epoch": 0.76, "learning_rate": 3.778529163144548e-05, "loss": 1.0778, "step": 894, "task_loss": 1.7317439317703247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.105076789855957, "epoch": 0.76, "learning_rate": 3.782755705832629e-05, "loss": 0.9484, "step": 895, "task_loss": 0.2082778513431549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.031673789024353, "epoch": 0.76, "learning_rate": 3.7869822485207104e-05, "loss": 1.1305, "step": 896, "task_loss": 0.7247858047485352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7948950529098511, "epoch": 0.76, "learning_rate": 3.7912087912087914e-05, "loss": 1.1038, "step": 897, "task_loss": 1.256655216217041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.878950834274292, "epoch": 0.76, "learning_rate": 3.795435333896872e-05, "loss": 0.8788, "step": 898, "task_loss": 0.3764266073703766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7968717217445374, "epoch": 0.76, "learning_rate": 3.799661876584953e-05, "loss": 0.9649, "step": 899, "task_loss": 1.6645703315734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8570467233657837, "epoch": 0.76, "learning_rate": 3.803888419273035e-05, "loss": 0.8179, "step": 900, "task_loss": 0.5114364624023438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9215599298477173, "epoch": 0.76, "learning_rate": 3.8081149619611165e-05, "loss": 0.9906, "step": 901, "task_loss": 1.2084954977035522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0516382455825806, "epoch": 0.76, "learning_rate": 3.8123415046491974e-05, "loss": 0.8247, "step": 902, "task_loss": 0.9142789244651794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.994260847568512, "epoch": 0.76, "learning_rate": 3.8165680473372784e-05, "loss": 0.9231, "step": 903, "task_loss": 1.0206170082092285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0775905847549438, "epoch": 0.76, "learning_rate": 3.820794590025359e-05, "loss": 1.1574, "step": 904, "task_loss": 1.0544328689575195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3944138288497925, "epoch": 0.76, "learning_rate": 3.82502113271344e-05, "loss": 0.9063, "step": 905, "task_loss": 1.109188437461853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9764843583106995, "epoch": 0.77, "learning_rate": 3.829247675401522e-05, "loss": 0.9735, "step": 906, "task_loss": 1.8123575448989868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.017120122909546, "epoch": 0.77, "learning_rate": 3.833474218089603e-05, "loss": 1.1359, "step": 907, "task_loss": 0.9205818176269531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0457156896591187, "epoch": 0.77, "learning_rate": 3.837700760777684e-05, "loss": 0.8679, "step": 908, "task_loss": 2.0430691242218018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8560712337493896, "epoch": 0.77, "learning_rate": 3.8419273034657653e-05, "loss": 1.013, "step": 909, "task_loss": 0.6274436712265015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9065303206443787, "epoch": 0.77, "learning_rate": 3.846153846153846e-05, "loss": 1.0423, "step": 910, "task_loss": 0.7250082492828369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.161895751953125, "epoch": 0.77, "learning_rate": 3.850380388841928e-05, "loss": 1.1945, "step": 911, "task_loss": 1.1996887922286987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2840684652328491, "epoch": 0.77, "learning_rate": 3.854606931530009e-05, "loss": 1.1767, "step": 912, "task_loss": 0.7904782295227051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3081060647964478, "epoch": 0.77, "learning_rate": 3.85883347421809e-05, "loss": 1.0609, "step": 913, "task_loss": 1.5624157190322876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.021205186843872, "epoch": 0.77, "learning_rate": 3.863060016906171e-05, "loss": 1.0364, "step": 914, "task_loss": 1.558895468711853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0080324411392212, "epoch": 0.77, "learning_rate": 3.867286559594252e-05, "loss": 0.7807, "step": 915, "task_loss": 1.084423542022705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6091734170913696, "epoch": 0.77, "learning_rate": 3.871513102282333e-05, "loss": 0.8504, "step": 916, "task_loss": 0.7098196744918823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1158721446990967, "epoch": 0.77, "learning_rate": 3.875739644970414e-05, "loss": 0.874, "step": 917, "task_loss": 1.6732234954833984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.121085286140442, "epoch": 0.78, "learning_rate": 3.879966187658495e-05, "loss": 1.0027, "step": 918, "task_loss": 1.22336745262146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5889465808868408, "epoch": 0.78, "learning_rate": 3.884192730346577e-05, "loss": 0.8872, "step": 919, "task_loss": 0.39150503277778625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6394715309143066, "epoch": 0.78, "learning_rate": 3.888419273034658e-05, "loss": 1.1139, "step": 920, "task_loss": 1.017325758934021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9628031253814697, "epoch": 0.78, "learning_rate": 3.892645815722739e-05, "loss": 1.0536, "step": 921, "task_loss": 1.054599642753601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6038956046104431, "epoch": 0.78, "learning_rate": 3.89687235841082e-05, "loss": 0.8938, "step": 922, "task_loss": 0.16306006908416748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.010401725769043, "epoch": 0.78, "learning_rate": 3.901098901098901e-05, "loss": 0.8673, "step": 923, "task_loss": 1.7269126176834106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8716728687286377, "epoch": 0.78, "learning_rate": 3.905325443786982e-05, "loss": 1.1106, "step": 924, "task_loss": 1.7616655826568604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9565305709838867, "epoch": 0.78, "learning_rate": 3.909551986475064e-05, "loss": 1.3859, "step": 925, "task_loss": 1.3713593482971191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5824107527732849, "epoch": 0.78, "learning_rate": 3.913778529163145e-05, "loss": 0.724, "step": 926, "task_loss": 0.6481862664222717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4840768575668335, "epoch": 0.78, "learning_rate": 3.9180050718512256e-05, "loss": 0.8968, "step": 927, "task_loss": 0.5967175960540771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0531705617904663, "epoch": 0.78, "learning_rate": 3.9222316145393066e-05, "loss": 1.0526, "step": 928, "task_loss": 0.5348222255706787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0892040729522705, "epoch": 0.78, "learning_rate": 3.926458157227388e-05, "loss": 0.9979, "step": 929, "task_loss": 0.9812852740287781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6578410863876343, "epoch": 0.79, "learning_rate": 3.93068469991547e-05, "loss": 0.983, "step": 930, "task_loss": 1.4472873210906982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7527291774749756, "epoch": 0.79, "learning_rate": 3.934911242603551e-05, "loss": 0.8333, "step": 931, "task_loss": 0.7980977892875671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47932684421539307, "epoch": 0.79, "learning_rate": 3.939137785291632e-05, "loss": 0.6831, "step": 932, "task_loss": 0.29740583896636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3974848985671997, "epoch": 0.79, "learning_rate": 3.9433643279797126e-05, "loss": 1.1262, "step": 933, "task_loss": 1.995893955230713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8135479688644409, "epoch": 0.79, "learning_rate": 3.9475908706677936e-05, "loss": 0.8664, "step": 934, "task_loss": 1.0040594339370728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2828150987625122, "epoch": 0.79, "learning_rate": 3.951817413355875e-05, "loss": 1.0743, "step": 935, "task_loss": 0.8203858137130737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7958099842071533, "epoch": 0.79, "learning_rate": 3.956043956043956e-05, "loss": 0.739, "step": 936, "task_loss": 0.23765406012535095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.710662841796875, "epoch": 0.79, "learning_rate": 3.960270498732037e-05, "loss": 0.9676, "step": 937, "task_loss": 0.5433305501937866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8049070835113525, "epoch": 0.79, "learning_rate": 3.964497041420119e-05, "loss": 0.8704, "step": 938, "task_loss": 0.8373986482620239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.930149495601654, "epoch": 0.79, "learning_rate": 3.9687235841081996e-05, "loss": 0.8924, "step": 939, "task_loss": 0.4963827431201935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9112241864204407, "epoch": 0.79, "learning_rate": 3.972950126796281e-05, "loss": 1.0633, "step": 940, "task_loss": 1.2500132322311401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5710884928703308, "epoch": 0.79, "learning_rate": 3.977176669484362e-05, "loss": 0.8388, "step": 941, "task_loss": 0.8553664088249207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7972924113273621, "epoch": 0.8, "learning_rate": 3.981403212172443e-05, "loss": 0.87, "step": 942, "task_loss": 0.7139917612075806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9264711141586304, "epoch": 0.8, "learning_rate": 3.985629754860524e-05, "loss": 1.009, "step": 943, "task_loss": 0.2500097453594208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2713966369628906, "epoch": 0.8, "learning_rate": 3.989856297548606e-05, "loss": 0.9165, "step": 944, "task_loss": 0.8897305727005005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.965168297290802, "epoch": 0.8, "learning_rate": 3.9940828402366866e-05, "loss": 1.0089, "step": 945, "task_loss": 0.8689515590667725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0661712884902954, "epoch": 0.8, "learning_rate": 3.9983093829247675e-05, "loss": 0.7454, "step": 946, "task_loss": 0.8283068537712097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7944927215576172, "epoch": 0.8, "learning_rate": 4.0025359256128485e-05, "loss": 0.8275, "step": 947, "task_loss": 0.8048862218856812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5694467425346375, "epoch": 0.8, "learning_rate": 4.00676246830093e-05, "loss": 0.8465, "step": 948, "task_loss": 1.6677870750427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7159451842308044, "epoch": 0.8, "learning_rate": 4.010989010989011e-05, "loss": 0.9979, "step": 949, "task_loss": 0.721280574798584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1846003532409668, "epoch": 0.8, "learning_rate": 4.0152155536770927e-05, "loss": 0.9007, "step": 950, "task_loss": 0.31311145424842834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7975897192955017, "epoch": 0.8, "learning_rate": 4.0194420963651736e-05, "loss": 0.7006, "step": 951, "task_loss": 0.6168971657752991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1540274620056152, "epoch": 0.8, "learning_rate": 4.0236686390532545e-05, "loss": 1.0858, "step": 952, "task_loss": 1.2207467555999756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5513806343078613, "epoch": 0.81, "learning_rate": 4.0278951817413355e-05, "loss": 0.6808, "step": 953, "task_loss": 0.6391775012016296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8955473899841309, "epoch": 0.81, "learning_rate": 4.032121724429417e-05, "loss": 0.9434, "step": 954, "task_loss": 1.462203025817871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7297910451889038, "epoch": 0.81, "learning_rate": 4.036348267117498e-05, "loss": 0.8184, "step": 955, "task_loss": 0.5925722122192383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8842399716377258, "epoch": 0.81, "learning_rate": 4.040574809805579e-05, "loss": 0.9015, "step": 956, "task_loss": 0.2413739711046219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1322941780090332, "epoch": 0.81, "learning_rate": 4.0448013524936606e-05, "loss": 0.9982, "step": 957, "task_loss": 1.266666054725647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9506944417953491, "epoch": 0.81, "learning_rate": 4.0490278951817415e-05, "loss": 0.8833, "step": 958, "task_loss": 1.259725570678711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7270945906639099, "epoch": 0.81, "learning_rate": 4.053254437869823e-05, "loss": 0.8987, "step": 959, "task_loss": 0.7694357633590698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8349928855895996, "epoch": 0.81, "learning_rate": 4.057480980557904e-05, "loss": 1.0189, "step": 960, "task_loss": 0.9906123876571655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.977832019329071, "epoch": 0.81, "learning_rate": 4.061707523245985e-05, "loss": 0.8727, "step": 961, "task_loss": 0.6111451387405396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3641244173049927, "epoch": 0.81, "learning_rate": 4.065934065934066e-05, "loss": 1.0169, "step": 962, "task_loss": 1.0062824487686157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7471967935562134, "epoch": 0.81, "learning_rate": 4.070160608622147e-05, "loss": 1.068, "step": 963, "task_loss": 1.2559819221496582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9855952262878418, "epoch": 0.81, "learning_rate": 4.0743871513102285e-05, "loss": 0.9983, "step": 964, "task_loss": 0.24628500640392303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4411020278930664, "epoch": 0.82, "learning_rate": 4.0786136939983095e-05, "loss": 0.9164, "step": 965, "task_loss": 1.339667797088623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.022794485092163, "epoch": 0.82, "learning_rate": 4.0828402366863904e-05, "loss": 0.823, "step": 966, "task_loss": 1.9382131099700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9341446757316589, "epoch": 0.82, "learning_rate": 4.087066779374472e-05, "loss": 1.0929, "step": 967, "task_loss": 1.0955976247787476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8919326066970825, "epoch": 0.82, "learning_rate": 4.091293322062553e-05, "loss": 0.996, "step": 968, "task_loss": 0.5159933567047119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0778310298919678, "epoch": 0.82, "learning_rate": 4.0955198647506346e-05, "loss": 0.9628, "step": 969, "task_loss": 1.631089210510254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.646014928817749, "epoch": 0.82, "learning_rate": 4.0997464074387155e-05, "loss": 0.7177, "step": 970, "task_loss": 0.848716139793396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.737389087677002, "epoch": 0.82, "learning_rate": 4.1039729501267964e-05, "loss": 1.2569, "step": 971, "task_loss": 1.0369161367416382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7804964184761047, "epoch": 0.82, "learning_rate": 4.1081994928148774e-05, "loss": 0.7256, "step": 972, "task_loss": 0.5092301964759827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.158642053604126, "epoch": 0.82, "learning_rate": 4.112426035502959e-05, "loss": 0.9623, "step": 973, "task_loss": 0.7963719367980957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.00654137134552, "epoch": 0.82, "learning_rate": 4.11665257819104e-05, "loss": 0.8913, "step": 974, "task_loss": 0.6699679493904114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6577127575874329, "epoch": 0.82, "learning_rate": 4.120879120879121e-05, "loss": 0.7402, "step": 975, "task_loss": 0.4661967158317566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9003466367721558, "epoch": 0.82, "learning_rate": 4.125105663567202e-05, "loss": 0.9758, "step": 976, "task_loss": 0.6604939699172974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0959017276763916, "epoch": 0.83, "learning_rate": 4.1293322062552834e-05, "loss": 0.9385, "step": 977, "task_loss": 1.63467538356781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8077751398086548, "epoch": 0.83, "learning_rate": 4.1335587489433644e-05, "loss": 0.6735, "step": 978, "task_loss": 0.8219801187515259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8586176633834839, "epoch": 0.83, "learning_rate": 4.137785291631446e-05, "loss": 0.8983, "step": 979, "task_loss": 0.4462464153766632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9896467924118042, "epoch": 0.83, "learning_rate": 4.142011834319527e-05, "loss": 0.954, "step": 980, "task_loss": 1.3744866847991943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0606168508529663, "epoch": 0.83, "learning_rate": 4.146238377007608e-05, "loss": 1.1461, "step": 981, "task_loss": 1.6571584939956665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8513714075088501, "epoch": 0.83, "learning_rate": 4.150464919695689e-05, "loss": 0.8889, "step": 982, "task_loss": 0.3037635385990143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.994424045085907, "epoch": 0.83, "learning_rate": 4.1546914623837704e-05, "loss": 0.9041, "step": 983, "task_loss": 1.0255382061004639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6671987771987915, "epoch": 0.83, "learning_rate": 4.1589180050718514e-05, "loss": 1.1117, "step": 984, "task_loss": 1.1333987712860107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7842681407928467, "epoch": 0.83, "learning_rate": 4.163144547759932e-05, "loss": 0.7954, "step": 985, "task_loss": 1.2768142223358154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8951166272163391, "epoch": 0.83, "learning_rate": 4.167371090448014e-05, "loss": 0.9686, "step": 986, "task_loss": 1.4309160709381104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2169384956359863, "epoch": 0.83, "learning_rate": 4.171597633136095e-05, "loss": 1.0158, "step": 987, "task_loss": 1.3494470119476318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4003479480743408, "epoch": 0.83, "learning_rate": 4.1758241758241765e-05, "loss": 1.125, "step": 988, "task_loss": 1.1926672458648682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8295202255249023, "epoch": 0.84, "learning_rate": 4.1800507185122574e-05, "loss": 0.7569, "step": 989, "task_loss": 1.34328293800354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1668071746826172, "epoch": 0.84, "learning_rate": 4.1842772612003383e-05, "loss": 0.9298, "step": 990, "task_loss": 1.9495867490768433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5928083658218384, "epoch": 0.84, "learning_rate": 4.188503803888419e-05, "loss": 0.9434, "step": 991, "task_loss": 1.0725996494293213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5244714021682739, "epoch": 0.84, "learning_rate": 4.1927303465765e-05, "loss": 0.632, "step": 992, "task_loss": 1.2524352073669434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2880223989486694, "epoch": 0.84, "learning_rate": 4.196956889264582e-05, "loss": 0.9899, "step": 993, "task_loss": 0.8505713939666748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0303956270217896, "epoch": 0.84, "learning_rate": 4.201183431952663e-05, "loss": 0.9589, "step": 994, "task_loss": 1.6729624271392822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8796193599700928, "epoch": 0.84, "learning_rate": 4.205409974640744e-05, "loss": 0.8152, "step": 995, "task_loss": 0.8178637623786926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.541543185710907, "epoch": 0.84, "learning_rate": 4.209636517328825e-05, "loss": 0.7942, "step": 996, "task_loss": 0.6259867548942566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8939079642295837, "epoch": 0.84, "learning_rate": 4.213863060016906e-05, "loss": 0.9016, "step": 997, "task_loss": 1.4589275121688843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.916896641254425, "epoch": 0.84, "learning_rate": 4.218089602704988e-05, "loss": 0.8484, "step": 998, "task_loss": 1.7455095052719116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2857661247253418, "epoch": 0.84, "learning_rate": 4.222316145393069e-05, "loss": 0.9986, "step": 999, "task_loss": 1.3231134414672852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5655268430709839, "epoch": 0.84, "learning_rate": 4.22654268808115e-05, "loss": 0.6823, "step": 1000, "task_loss": 0.7202645540237427 }, { "epoch": 0.84, "eval_accuracy": 0.8818217821782178, "eval_loss": 0.5126578211784363, "eval_runtime": 229.1486, "eval_samples_per_second": 110.191, "eval_steps_per_second": 0.864, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.647757887840271, "epoch": 0.85, "learning_rate": 4.230769230769231e-05, "loss": 0.7882, "step": 1001, "task_loss": 0.5003728270530701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.466987669467926, "epoch": 0.85, "learning_rate": 4.234995773457312e-05, "loss": 0.7159, "step": 1002, "task_loss": 0.5549584031105042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8620222806930542, "epoch": 0.85, "learning_rate": 4.239222316145393e-05, "loss": 0.8545, "step": 1003, "task_loss": 0.8501796722412109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9268772602081299, "epoch": 0.85, "learning_rate": 4.243448858833474e-05, "loss": 0.8878, "step": 1004, "task_loss": 0.720402717590332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.7705105543136597, "epoch": 0.85, "learning_rate": 4.247675401521555e-05, "loss": 1.0673, "step": 1005, "task_loss": 2.286893606185913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1945281028747559, "epoch": 0.85, "learning_rate": 4.251901944209637e-05, "loss": 0.882, "step": 1006, "task_loss": 0.22540704905986786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8362153768539429, "epoch": 0.85, "learning_rate": 4.256128486897718e-05, "loss": 0.7769, "step": 1007, "task_loss": 0.527235746383667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8700671195983887, "epoch": 0.85, "learning_rate": 4.260355029585799e-05, "loss": 0.8316, "step": 1008, "task_loss": 0.9892043471336365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5354052782058716, "epoch": 0.85, "learning_rate": 4.26458157227388e-05, "loss": 1.0411, "step": 1009, "task_loss": 1.6685892343521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7675468921661377, "epoch": 0.85, "learning_rate": 4.268808114961961e-05, "loss": 0.9394, "step": 1010, "task_loss": 1.3933601379394531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1413772106170654, "epoch": 0.85, "learning_rate": 4.273034657650042e-05, "loss": 1.0169, "step": 1011, "task_loss": 1.3889970779418945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7557093501091003, "epoch": 0.85, "learning_rate": 4.277261200338124e-05, "loss": 0.8941, "step": 1012, "task_loss": 0.30902594327926636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5545022487640381, "epoch": 0.86, "learning_rate": 4.281487743026205e-05, "loss": 0.6347, "step": 1013, "task_loss": 0.36918920278549194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9747594594955444, "epoch": 0.86, "learning_rate": 4.2857142857142856e-05, "loss": 0.906, "step": 1014, "task_loss": 0.7703030109405518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6983894109725952, "epoch": 0.86, "learning_rate": 4.289940828402367e-05, "loss": 0.7093, "step": 1015, "task_loss": 0.9726892113685608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6856435537338257, "epoch": 0.86, "learning_rate": 4.294167371090448e-05, "loss": 0.794, "step": 1016, "task_loss": 0.7919455170631409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6856673955917358, "epoch": 0.86, "learning_rate": 4.29839391377853e-05, "loss": 0.9054, "step": 1017, "task_loss": 0.4258291721343994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8028672933578491, "epoch": 0.86, "learning_rate": 4.302620456466611e-05, "loss": 0.7592, "step": 1018, "task_loss": 1.0734472274780273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.469557523727417, "epoch": 0.86, "learning_rate": 4.306846999154692e-05, "loss": 1.0664, "step": 1019, "task_loss": 1.4430866241455078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8346530795097351, "epoch": 0.86, "learning_rate": 4.3110735418427726e-05, "loss": 0.6785, "step": 1020, "task_loss": 1.2720863819122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6673634052276611, "epoch": 0.86, "learning_rate": 4.3153000845308536e-05, "loss": 1.1122, "step": 1021, "task_loss": 0.1406627744436264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6353693604469299, "epoch": 0.86, "learning_rate": 4.319526627218935e-05, "loss": 0.8348, "step": 1022, "task_loss": 0.12585875391960144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7626101970672607, "epoch": 0.86, "learning_rate": 4.323753169907016e-05, "loss": 0.7084, "step": 1023, "task_loss": 1.0916800498962402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8243899345397949, "epoch": 0.87, "learning_rate": 4.327979712595097e-05, "loss": 0.9452, "step": 1024, "task_loss": 0.2818711996078491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7899994850158691, "epoch": 0.87, "learning_rate": 4.332206255283179e-05, "loss": 0.7363, "step": 1025, "task_loss": 0.7793794870376587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4777749180793762, "epoch": 0.87, "learning_rate": 4.3364327979712596e-05, "loss": 0.8622, "step": 1026, "task_loss": 0.023271748796105385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7985219955444336, "epoch": 0.87, "learning_rate": 4.340659340659341e-05, "loss": 0.9553, "step": 1027, "task_loss": 0.6418771147727966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9799319505691528, "epoch": 0.87, "learning_rate": 4.344885883347422e-05, "loss": 1.0448, "step": 1028, "task_loss": 1.342651605606079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5869677662849426, "epoch": 0.87, "learning_rate": 4.349112426035503e-05, "loss": 0.7644, "step": 1029, "task_loss": 0.8527691960334778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2063424587249756, "epoch": 0.87, "learning_rate": 4.353338968723584e-05, "loss": 0.8745, "step": 1030, "task_loss": 1.6588696241378784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0741658210754395, "epoch": 0.87, "learning_rate": 4.3575655114116657e-05, "loss": 0.9719, "step": 1031, "task_loss": 0.6360454559326172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4267752170562744, "epoch": 0.87, "learning_rate": 4.3617920540997466e-05, "loss": 0.9083, "step": 1032, "task_loss": 0.5487968325614929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5794072151184082, "epoch": 0.87, "learning_rate": 4.3660185967878275e-05, "loss": 1.0125, "step": 1033, "task_loss": 0.320943146944046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1888625621795654, "epoch": 0.87, "learning_rate": 4.370245139475909e-05, "loss": 0.9984, "step": 1034, "task_loss": 1.572576642036438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5843337178230286, "epoch": 0.87, "learning_rate": 4.37447168216399e-05, "loss": 0.737, "step": 1035, "task_loss": 1.1464805603027344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1899930238723755, "epoch": 0.88, "learning_rate": 4.378698224852072e-05, "loss": 1.0306, "step": 1036, "task_loss": 0.8967394828796387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7702544927597046, "epoch": 0.88, "learning_rate": 4.3829247675401526e-05, "loss": 0.8795, "step": 1037, "task_loss": 1.5231029987335205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7108954191207886, "epoch": 0.88, "learning_rate": 4.3871513102282336e-05, "loss": 0.8794, "step": 1038, "task_loss": 0.6973483562469482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8070660829544067, "epoch": 0.88, "learning_rate": 4.3913778529163145e-05, "loss": 0.8355, "step": 1039, "task_loss": 0.4528863728046417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.891578197479248, "epoch": 0.88, "learning_rate": 4.3956043956043955e-05, "loss": 0.9294, "step": 1040, "task_loss": 0.7368393540382385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9262855052947998, "epoch": 0.88, "learning_rate": 4.399830938292477e-05, "loss": 1.1193, "step": 1041, "task_loss": 0.9151571989059448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6654446125030518, "epoch": 0.88, "learning_rate": 4.404057480980558e-05, "loss": 0.6845, "step": 1042, "task_loss": 1.1652885675430298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.917605459690094, "epoch": 0.88, "learning_rate": 4.408284023668639e-05, "loss": 0.9026, "step": 1043, "task_loss": 0.699445903301239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6230236887931824, "epoch": 0.88, "learning_rate": 4.4125105663567206e-05, "loss": 0.8428, "step": 1044, "task_loss": 0.5558046102523804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5473418235778809, "epoch": 0.88, "learning_rate": 4.4167371090448015e-05, "loss": 0.935, "step": 1045, "task_loss": 0.723911464214325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5218133926391602, "epoch": 0.88, "learning_rate": 4.420963651732883e-05, "loss": 1.1784, "step": 1046, "task_loss": 1.442440152168274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.523809015750885, "epoch": 0.88, "learning_rate": 4.425190194420964e-05, "loss": 1.159, "step": 1047, "task_loss": 0.2729208171367645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5910937786102295, "epoch": 0.89, "learning_rate": 4.429416737109045e-05, "loss": 0.8459, "step": 1048, "task_loss": 0.7357900738716125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6066716909408569, "epoch": 0.89, "learning_rate": 4.433643279797126e-05, "loss": 0.8672, "step": 1049, "task_loss": 0.8049559593200684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7794140577316284, "epoch": 0.89, "learning_rate": 4.437869822485207e-05, "loss": 0.9468, "step": 1050, "task_loss": 0.7368866205215454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5288619995117188, "epoch": 0.89, "learning_rate": 4.4420963651732885e-05, "loss": 0.7537, "step": 1051, "task_loss": 0.6331936717033386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1077601909637451, "epoch": 0.89, "learning_rate": 4.4463229078613694e-05, "loss": 0.96, "step": 1052, "task_loss": 0.5554484128952026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9439190626144409, "epoch": 0.89, "learning_rate": 4.4505494505494504e-05, "loss": 0.7444, "step": 1053, "task_loss": 1.0459246635437012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8643949031829834, "epoch": 0.89, "learning_rate": 4.454775993237532e-05, "loss": 0.9363, "step": 1054, "task_loss": 1.070670485496521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43990471959114075, "epoch": 0.89, "learning_rate": 4.459002535925613e-05, "loss": 0.7478, "step": 1055, "task_loss": 0.27512404322624207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2374026775360107, "epoch": 0.89, "learning_rate": 4.4632290786136946e-05, "loss": 0.9227, "step": 1056, "task_loss": 1.3580701351165771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 2.390275716781616, "epoch": 0.89, "learning_rate": 4.4674556213017755e-05, "loss": 1.0513, "step": 1057, "task_loss": 1.2459529638290405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0342093706130981, "epoch": 0.89, "learning_rate": 4.4716821639898564e-05, "loss": 0.8001, "step": 1058, "task_loss": 1.5110549926757812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4645204246044159, "epoch": 0.89, "learning_rate": 4.4759087066779374e-05, "loss": 0.7747, "step": 1059, "task_loss": 0.2645789384841919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2553523778915405, "epoch": 0.9, "learning_rate": 4.480135249366019e-05, "loss": 0.9667, "step": 1060, "task_loss": 1.6067612171173096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5791783928871155, "epoch": 0.9, "learning_rate": 4.4843617920541e-05, "loss": 0.8263, "step": 1061, "task_loss": 0.5787603259086609 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1135956048965454, "epoch": 0.9, "learning_rate": 4.488588334742181e-05, "loss": 0.9713, "step": 1062, "task_loss": 1.0425268411636353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0000865459442139, "epoch": 0.9, "learning_rate": 4.4928148774302625e-05, "loss": 0.822, "step": 1063, "task_loss": 0.6935333013534546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6616714000701904, "epoch": 0.9, "learning_rate": 4.4970414201183434e-05, "loss": 1.0247, "step": 1064, "task_loss": 1.922581434249878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9025981426239014, "epoch": 0.9, "learning_rate": 4.501267962806425e-05, "loss": 0.902, "step": 1065, "task_loss": 0.7872021198272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1003330945968628, "epoch": 0.9, "learning_rate": 4.505494505494506e-05, "loss": 1.0747, "step": 1066, "task_loss": 0.3769143223762512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5716548562049866, "epoch": 0.9, "learning_rate": 4.509721048182587e-05, "loss": 0.8985, "step": 1067, "task_loss": 0.5844125151634216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0084329843521118, "epoch": 0.9, "learning_rate": 4.513947590870668e-05, "loss": 0.7896, "step": 1068, "task_loss": 0.9215744733810425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4554329812526703, "epoch": 0.9, "learning_rate": 4.518174133558749e-05, "loss": 0.6577, "step": 1069, "task_loss": 0.7598364353179932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9826257824897766, "epoch": 0.9, "learning_rate": 4.5224006762468304e-05, "loss": 0.8512, "step": 1070, "task_loss": 0.9104565382003784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8501220345497131, "epoch": 0.9, "learning_rate": 4.5266272189349114e-05, "loss": 0.8118, "step": 1071, "task_loss": 0.7907831072807312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.846510112285614, "epoch": 0.91, "learning_rate": 4.530853761622992e-05, "loss": 0.9324, "step": 1072, "task_loss": 1.0277478694915771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7719987630844116, "epoch": 0.91, "learning_rate": 4.535080304311074e-05, "loss": 0.912, "step": 1073, "task_loss": 0.6699872612953186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2992414236068726, "epoch": 0.91, "learning_rate": 4.539306846999155e-05, "loss": 0.7656, "step": 1074, "task_loss": 0.9048945307731628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8488128781318665, "epoch": 0.91, "learning_rate": 4.5435333896872365e-05, "loss": 0.9368, "step": 1075, "task_loss": 0.5284009575843811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7792898416519165, "epoch": 0.91, "learning_rate": 4.5477599323753174e-05, "loss": 1.1859, "step": 1076, "task_loss": 0.6506417989730835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8472839593887329, "epoch": 0.91, "learning_rate": 4.5519864750633983e-05, "loss": 0.9629, "step": 1077, "task_loss": 1.2173994779586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46597281098365784, "epoch": 0.91, "learning_rate": 4.556213017751479e-05, "loss": 0.6591, "step": 1078, "task_loss": 0.7137371897697449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6434260010719299, "epoch": 0.91, "learning_rate": 4.56043956043956e-05, "loss": 0.853, "step": 1079, "task_loss": 0.5525861978530884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7839975953102112, "epoch": 0.91, "learning_rate": 4.564666103127642e-05, "loss": 0.998, "step": 1080, "task_loss": 0.9719991683959961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0682356357574463, "epoch": 0.91, "learning_rate": 4.568892645815723e-05, "loss": 1.059, "step": 1081, "task_loss": 1.2033246755599976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0434798002243042, "epoch": 0.91, "learning_rate": 4.573119188503804e-05, "loss": 0.9648, "step": 1082, "task_loss": 1.0331995487213135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5535196661949158, "epoch": 0.91, "learning_rate": 4.577345731191885e-05, "loss": 0.8531, "step": 1083, "task_loss": 0.4642697870731354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7953972220420837, "epoch": 0.92, "learning_rate": 4.581572273879966e-05, "loss": 1.0091, "step": 1084, "task_loss": 0.6886611580848694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6668606400489807, "epoch": 0.92, "learning_rate": 4.585798816568048e-05, "loss": 0.9639, "step": 1085, "task_loss": 0.9323781728744507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6567552089691162, "epoch": 0.92, "learning_rate": 4.590025359256129e-05, "loss": 0.6679, "step": 1086, "task_loss": 1.1746405363082886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3561595678329468, "epoch": 0.92, "learning_rate": 4.59425190194421e-05, "loss": 0.9204, "step": 1087, "task_loss": 1.1490095853805542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9540305137634277, "epoch": 0.92, "learning_rate": 4.598478444632291e-05, "loss": 0.9032, "step": 1088, "task_loss": 0.7797126173973083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8380683660507202, "epoch": 0.92, "learning_rate": 4.602704987320372e-05, "loss": 0.7912, "step": 1089, "task_loss": 0.8965733647346497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.162031650543213, "epoch": 0.92, "learning_rate": 4.606931530008453e-05, "loss": 0.7823, "step": 1090, "task_loss": 1.0961861610412598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6758277416229248, "epoch": 0.92, "learning_rate": 4.611158072696534e-05, "loss": 0.9393, "step": 1091, "task_loss": 1.8535586595535278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8472609519958496, "epoch": 0.92, "learning_rate": 4.615384615384616e-05, "loss": 0.7982, "step": 1092, "task_loss": 0.9317784309387207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3202778100967407, "epoch": 0.92, "learning_rate": 4.619611158072697e-05, "loss": 0.9378, "step": 1093, "task_loss": 1.2962573766708374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0958589315414429, "epoch": 0.92, "learning_rate": 4.6238377007607784e-05, "loss": 0.8867, "step": 1094, "task_loss": 0.39049527049064636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.743079662322998, "epoch": 0.93, "learning_rate": 4.628064243448859e-05, "loss": 0.6614, "step": 1095, "task_loss": 0.8939529657363892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6313505172729492, "epoch": 0.93, "learning_rate": 4.63229078613694e-05, "loss": 0.8634, "step": 1096, "task_loss": 0.21062703430652618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1305420398712158, "epoch": 0.93, "learning_rate": 4.636517328825021e-05, "loss": 0.9591, "step": 1097, "task_loss": 1.2483044862747192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.773510217666626, "epoch": 0.93, "learning_rate": 4.640743871513102e-05, "loss": 0.6836, "step": 1098, "task_loss": 0.8179703950881958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9545236825942993, "epoch": 0.93, "learning_rate": 4.644970414201184e-05, "loss": 0.8491, "step": 1099, "task_loss": 1.3345659971237183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9103188514709473, "epoch": 0.93, "learning_rate": 4.649196956889265e-05, "loss": 0.9469, "step": 1100, "task_loss": 0.6113436222076416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.858731746673584, "epoch": 0.93, "learning_rate": 4.6534234995773456e-05, "loss": 0.9865, "step": 1101, "task_loss": 0.45781344175338745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1944605112075806, "epoch": 0.93, "learning_rate": 4.657650042265427e-05, "loss": 0.865, "step": 1102, "task_loss": 0.811022162437439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5870804786682129, "epoch": 0.93, "learning_rate": 4.661876584953508e-05, "loss": 0.6043, "step": 1103, "task_loss": 0.07904958724975586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.266842007637024, "epoch": 0.93, "learning_rate": 4.66610312764159e-05, "loss": 0.9609, "step": 1104, "task_loss": 1.3891403675079346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.146797776222229, "epoch": 0.93, "learning_rate": 4.670329670329671e-05, "loss": 0.9449, "step": 1105, "task_loss": 1.8598222732543945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4028728008270264, "epoch": 0.93, "learning_rate": 4.674556213017752e-05, "loss": 0.9866, "step": 1106, "task_loss": 1.2189751863479614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7495356798171997, "epoch": 0.94, "learning_rate": 4.6787827557058326e-05, "loss": 0.9477, "step": 1107, "task_loss": 0.49210137128829956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3993257284164429, "epoch": 0.94, "learning_rate": 4.683009298393914e-05, "loss": 0.9121, "step": 1108, "task_loss": 1.292222499847412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5882537961006165, "epoch": 0.94, "learning_rate": 4.687235841081995e-05, "loss": 0.8171, "step": 1109, "task_loss": 0.4914180338382721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5512704253196716, "epoch": 0.94, "learning_rate": 4.691462383770076e-05, "loss": 0.592, "step": 1110, "task_loss": 0.6743378043174744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.005277156829834, "epoch": 0.94, "learning_rate": 4.695688926458158e-05, "loss": 0.9265, "step": 1111, "task_loss": 0.5963150858879089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7325995564460754, "epoch": 0.94, "learning_rate": 4.6999154691462387e-05, "loss": 0.8939, "step": 1112, "task_loss": 1.3621599674224854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.973212480545044, "epoch": 0.94, "learning_rate": 4.7041420118343196e-05, "loss": 0.9879, "step": 1113, "task_loss": 0.654776930809021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0871949195861816, "epoch": 0.94, "learning_rate": 4.708368554522401e-05, "loss": 1.1087, "step": 1114, "task_loss": 0.9001259803771973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4848073720932007, "epoch": 0.94, "learning_rate": 4.712595097210482e-05, "loss": 0.6826, "step": 1115, "task_loss": 0.11071043461561203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.666934072971344, "epoch": 0.94, "learning_rate": 4.716821639898563e-05, "loss": 0.6637, "step": 1116, "task_loss": 0.8432983160018921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.822002112865448, "epoch": 0.94, "learning_rate": 4.721048182586644e-05, "loss": 0.8006, "step": 1117, "task_loss": 0.7242121696472168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9545989036560059, "epoch": 0.94, "learning_rate": 4.7252747252747257e-05, "loss": 0.715, "step": 1118, "task_loss": 0.7143362164497375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5905972719192505, "epoch": 0.95, "learning_rate": 4.7295012679628066e-05, "loss": 0.7469, "step": 1119, "task_loss": 0.33989161252975464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7058432698249817, "epoch": 0.95, "learning_rate": 4.7337278106508875e-05, "loss": 0.6912, "step": 1120, "task_loss": 0.36675822734832764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8441854119300842, "epoch": 0.95, "learning_rate": 4.737954353338969e-05, "loss": 0.6749, "step": 1121, "task_loss": 0.8376095294952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5611559152603149, "epoch": 0.95, "learning_rate": 4.74218089602705e-05, "loss": 0.87, "step": 1122, "task_loss": 0.19328813254833221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9309321045875549, "epoch": 0.95, "learning_rate": 4.746407438715132e-05, "loss": 0.6955, "step": 1123, "task_loss": 0.7284340858459473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8362783193588257, "epoch": 0.95, "learning_rate": 4.7506339814032126e-05, "loss": 0.8261, "step": 1124, "task_loss": 0.6988627314567566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5206829309463501, "epoch": 0.95, "learning_rate": 4.7548605240912936e-05, "loss": 0.6633, "step": 1125, "task_loss": 0.3773687779903412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7864575982093811, "epoch": 0.95, "learning_rate": 4.7590870667793745e-05, "loss": 0.9133, "step": 1126, "task_loss": 1.050910234451294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.12627112865448, "epoch": 0.95, "learning_rate": 4.7633136094674555e-05, "loss": 0.831, "step": 1127, "task_loss": 1.7597535848617554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6861032247543335, "epoch": 0.95, "learning_rate": 4.767540152155537e-05, "loss": 0.9403, "step": 1128, "task_loss": 1.1008251905441284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9799793362617493, "epoch": 0.95, "learning_rate": 4.771766694843618e-05, "loss": 0.7904, "step": 1129, "task_loss": 1.340074062347412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9191635847091675, "epoch": 0.95, "learning_rate": 4.775993237531699e-05, "loss": 0.7899, "step": 1130, "task_loss": 1.689366340637207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3684549331665039, "epoch": 0.96, "learning_rate": 4.7802197802197806e-05, "loss": 0.635, "step": 1131, "task_loss": 0.11720118671655655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6864452362060547, "epoch": 0.96, "learning_rate": 4.7844463229078615e-05, "loss": 0.9507, "step": 1132, "task_loss": 0.3467939794063568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5363916754722595, "epoch": 0.96, "learning_rate": 4.788672865595943e-05, "loss": 0.8919, "step": 1133, "task_loss": 1.0830916166305542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7095955610275269, "epoch": 0.96, "learning_rate": 4.792899408284024e-05, "loss": 0.8856, "step": 1134, "task_loss": 0.47506776452064514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1227432489395142, "epoch": 0.96, "learning_rate": 4.797125950972105e-05, "loss": 1.0267, "step": 1135, "task_loss": 1.6771633625030518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8548144102096558, "epoch": 0.96, "learning_rate": 4.801352493660186e-05, "loss": 0.8635, "step": 1136, "task_loss": 1.148176670074463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6977930665016174, "epoch": 0.96, "learning_rate": 4.8055790363482676e-05, "loss": 0.7531, "step": 1137, "task_loss": 0.8055868148803711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9715834259986877, "epoch": 0.96, "learning_rate": 4.8098055790363485e-05, "loss": 0.7051, "step": 1138, "task_loss": 0.6085440516471863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5230078101158142, "epoch": 0.96, "learning_rate": 4.8140321217244294e-05, "loss": 0.9686, "step": 1139, "task_loss": 1.2118300199508667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4384773969650269, "epoch": 0.96, "learning_rate": 4.818258664412511e-05, "loss": 1.0679, "step": 1140, "task_loss": 0.5830682516098022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0432379245758057, "epoch": 0.96, "learning_rate": 4.822485207100592e-05, "loss": 0.8904, "step": 1141, "task_loss": 1.2358118295669556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.752661943435669, "epoch": 0.96, "learning_rate": 4.826711749788673e-05, "loss": 0.8425, "step": 1142, "task_loss": 1.4105759859085083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.66182541847229, "epoch": 0.97, "learning_rate": 4.8309382924767545e-05, "loss": 0.8207, "step": 1143, "task_loss": 0.6930851340293884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8332805633544922, "epoch": 0.97, "learning_rate": 4.8351648351648355e-05, "loss": 0.7004, "step": 1144, "task_loss": 0.6605567336082458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7746790647506714, "epoch": 0.97, "learning_rate": 4.8393913778529164e-05, "loss": 0.652, "step": 1145, "task_loss": 0.5090996026992798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7387020587921143, "epoch": 0.97, "learning_rate": 4.8436179205409974e-05, "loss": 0.9608, "step": 1146, "task_loss": 1.7854914665222168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4871561527252197, "epoch": 0.97, "learning_rate": 4.847844463229079e-05, "loss": 0.7618, "step": 1147, "task_loss": 0.32913532853126526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7172813415527344, "epoch": 0.97, "learning_rate": 4.85207100591716e-05, "loss": 0.874, "step": 1148, "task_loss": 0.5674693584442139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.047457218170166, "epoch": 0.97, "learning_rate": 4.856297548605241e-05, "loss": 0.7684, "step": 1149, "task_loss": 1.448573112487793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1701298952102661, "epoch": 0.97, "learning_rate": 4.8605240912933225e-05, "loss": 0.8693, "step": 1150, "task_loss": 1.59458327293396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.786941647529602, "epoch": 0.97, "learning_rate": 4.8647506339814034e-05, "loss": 0.7571, "step": 1151, "task_loss": 1.1657963991165161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8480064868927002, "epoch": 0.97, "learning_rate": 4.868977176669485e-05, "loss": 0.7054, "step": 1152, "task_loss": 0.4873350262641907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4961494207382202, "epoch": 0.97, "learning_rate": 4.873203719357566e-05, "loss": 0.8035, "step": 1153, "task_loss": 0.6741071939468384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42167800664901733, "epoch": 0.97, "learning_rate": 4.877430262045647e-05, "loss": 0.7644, "step": 1154, "task_loss": 0.5470255017280579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9479389190673828, "epoch": 0.98, "learning_rate": 4.881656804733728e-05, "loss": 0.8436, "step": 1155, "task_loss": 1.6125448942184448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6049249172210693, "epoch": 0.98, "learning_rate": 4.885883347421809e-05, "loss": 0.6315, "step": 1156, "task_loss": 0.4420894384384155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5910985469818115, "epoch": 0.98, "learning_rate": 4.8901098901098904e-05, "loss": 0.725, "step": 1157, "task_loss": 0.3695553243160248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6824641227722168, "epoch": 0.98, "learning_rate": 4.8943364327979713e-05, "loss": 0.7071, "step": 1158, "task_loss": 0.424686998128891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5451250672340393, "epoch": 0.98, "learning_rate": 4.898562975486053e-05, "loss": 0.8143, "step": 1159, "task_loss": 0.49506431818008423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6511393189430237, "epoch": 0.98, "learning_rate": 4.902789518174134e-05, "loss": 0.8072, "step": 1160, "task_loss": 0.8593818545341492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6046502590179443, "epoch": 0.98, "learning_rate": 4.907016060862215e-05, "loss": 0.7687, "step": 1161, "task_loss": 0.34596338868141174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1764739751815796, "epoch": 0.98, "learning_rate": 4.9112426035502965e-05, "loss": 0.9759, "step": 1162, "task_loss": 1.2387423515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9606496095657349, "epoch": 0.98, "learning_rate": 4.9154691462383774e-05, "loss": 0.9379, "step": 1163, "task_loss": 1.2935922145843506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7973233461380005, "epoch": 0.98, "learning_rate": 4.919695688926458e-05, "loss": 0.8229, "step": 1164, "task_loss": 1.3531285524368286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7259988188743591, "epoch": 0.98, "learning_rate": 4.923922231614539e-05, "loss": 0.6551, "step": 1165, "task_loss": 0.6615791320800781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0015661716461182, "epoch": 0.99, "learning_rate": 4.928148774302621e-05, "loss": 0.9141, "step": 1166, "task_loss": 0.7225884199142456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6484079957008362, "epoch": 0.99, "learning_rate": 4.932375316990702e-05, "loss": 0.702, "step": 1167, "task_loss": 0.47099247574806213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7755736112594604, "epoch": 0.99, "learning_rate": 4.936601859678783e-05, "loss": 0.8777, "step": 1168, "task_loss": 1.3386353254318237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1050901412963867, "epoch": 0.99, "learning_rate": 4.9408284023668644e-05, "loss": 0.911, "step": 1169, "task_loss": 0.9388408660888672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0147496461868286, "epoch": 0.99, "learning_rate": 4.945054945054945e-05, "loss": 0.6335, "step": 1170, "task_loss": 0.38833218812942505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6743987798690796, "epoch": 0.99, "learning_rate": 4.949281487743026e-05, "loss": 0.7153, "step": 1171, "task_loss": 0.17356200516223907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4332040548324585, "epoch": 0.99, "learning_rate": 4.953508030431108e-05, "loss": 0.8204, "step": 1172, "task_loss": 1.4597587585449219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4684329926967621, "epoch": 0.99, "learning_rate": 4.957734573119189e-05, "loss": 0.8475, "step": 1173, "task_loss": 0.19058871269226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.02614164352417, "epoch": 0.99, "learning_rate": 4.96196111580727e-05, "loss": 0.9345, "step": 1174, "task_loss": 1.030394434928894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.20999276638031, "epoch": 0.99, "learning_rate": 4.966187658495351e-05, "loss": 0.998, "step": 1175, "task_loss": 1.2831350564956665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1119331121444702, "epoch": 0.99, "learning_rate": 4.970414201183432e-05, "loss": 0.868, "step": 1176, "task_loss": 0.5656747221946716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.580865740776062, "epoch": 0.99, "learning_rate": 4.974640743871513e-05, "loss": 0.6226, "step": 1177, "task_loss": 0.8677773475646973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4530701637268066, "epoch": 1.0, "learning_rate": 4.978867286559594e-05, "loss": 1.1559, "step": 1178, "task_loss": 1.3196793794631958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6576048731803894, "epoch": 1.0, "learning_rate": 4.983093829247676e-05, "loss": 0.7788, "step": 1179, "task_loss": 1.4874781370162964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6078149080276489, "epoch": 1.0, "learning_rate": 4.987320371935757e-05, "loss": 0.6888, "step": 1180, "task_loss": 0.5247536897659302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8011260628700256, "epoch": 1.0, "learning_rate": 4.9915469146238384e-05, "loss": 0.8736, "step": 1181, "task_loss": 1.6819392442703247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6642876863479614, "epoch": 1.0, "learning_rate": 4.995773457311919e-05, "loss": 0.8511, "step": 1182, "task_loss": 0.22670377790927887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7785025835037231, "epoch": 1.0, "learning_rate": 5e-05, "loss": 0.8961, "step": 1183, "task_loss": 0.6874377727508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0219122171401978, "epoch": 1.0, "learning_rate": 4.999530384145769e-05, "loss": 1.4442, "step": 1184, "task_loss": 0.3908257782459259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6700040102005005, "epoch": 1.0, "learning_rate": 4.999060768291538e-05, "loss": 0.8876, "step": 1185, "task_loss": 0.43625307083129883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7480214834213257, "epoch": 1.0, "learning_rate": 4.998591152437306e-05, "loss": 0.7905, "step": 1186, "task_loss": 1.7818138599395752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9560362100601196, "epoch": 1.0, "learning_rate": 4.9981215365830755e-05, "loss": 0.8194, "step": 1187, "task_loss": 1.0421855449676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7198781967163086, "epoch": 1.0, "learning_rate": 4.997651920728844e-05, "loss": 0.8554, "step": 1188, "task_loss": 0.8423463106155396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.4338092803955078, "epoch": 1.01, "learning_rate": 4.997182304874613e-05, "loss": 0.8194, "step": 1189, "task_loss": 0.9605714678764343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7157037258148193, "epoch": 1.01, "learning_rate": 4.9967126890203814e-05, "loss": 0.7299, "step": 1190, "task_loss": 0.4957178235054016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2886805534362793, "epoch": 1.01, "learning_rate": 4.99624307316615e-05, "loss": 0.8575, "step": 1191, "task_loss": 1.5501511096954346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5618396997451782, "epoch": 1.01, "learning_rate": 4.995773457311919e-05, "loss": 0.6364, "step": 1192, "task_loss": 0.4950997829437256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5813946723937988, "epoch": 1.01, "learning_rate": 4.995303841457688e-05, "loss": 0.7186, "step": 1193, "task_loss": 0.4804908335208893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6791324615478516, "epoch": 1.01, "learning_rate": 4.9948342256034566e-05, "loss": 0.8508, "step": 1194, "task_loss": 0.9111616611480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.611919641494751, "epoch": 1.01, "learning_rate": 4.994364609749225e-05, "loss": 0.8471, "step": 1195, "task_loss": 0.5761310458183289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8236588835716248, "epoch": 1.01, "learning_rate": 4.993894993894994e-05, "loss": 0.8317, "step": 1196, "task_loss": 0.22680160403251648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.399686574935913, "epoch": 1.01, "learning_rate": 4.993425378040763e-05, "loss": 0.817, "step": 1197, "task_loss": 1.0075557231903076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5247260928153992, "epoch": 1.01, "learning_rate": 4.992955762186531e-05, "loss": 0.6365, "step": 1198, "task_loss": 0.9007152915000916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45795583724975586, "epoch": 1.01, "learning_rate": 4.9924861463323004e-05, "loss": 0.4638, "step": 1199, "task_loss": 0.4552762508392334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1437170505523682, "epoch": 1.01, "learning_rate": 4.992016530478069e-05, "loss": 0.9732, "step": 1200, "task_loss": 0.5305445194244385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7056131958961487, "epoch": 1.02, "learning_rate": 4.9915469146238384e-05, "loss": 0.8853, "step": 1201, "task_loss": 0.426228791475296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8267409801483154, "epoch": 1.02, "learning_rate": 4.991077298769607e-05, "loss": 0.7443, "step": 1202, "task_loss": 0.4927351772785187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5581204295158386, "epoch": 1.02, "learning_rate": 4.990607682915375e-05, "loss": 0.7433, "step": 1203, "task_loss": 0.5641399621963501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8545762300491333, "epoch": 1.02, "learning_rate": 4.990138067061144e-05, "loss": 0.8288, "step": 1204, "task_loss": 0.9960431456565857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0042822360992432, "epoch": 1.02, "learning_rate": 4.989668451206913e-05, "loss": 0.7762, "step": 1205, "task_loss": 0.7500176429748535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.720524787902832, "epoch": 1.02, "learning_rate": 4.989198835352682e-05, "loss": 0.9325, "step": 1206, "task_loss": 1.6345655918121338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5197741985321045, "epoch": 1.02, "learning_rate": 4.98872921949845e-05, "loss": 0.9269, "step": 1207, "task_loss": 0.77543705701828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7119118571281433, "epoch": 1.02, "learning_rate": 4.9882596036442195e-05, "loss": 0.9409, "step": 1208, "task_loss": 1.2722465991973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6984691619873047, "epoch": 1.02, "learning_rate": 4.987789987789988e-05, "loss": 0.6093, "step": 1209, "task_loss": 1.0201630592346191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8998209834098816, "epoch": 1.02, "learning_rate": 4.987320371935757e-05, "loss": 0.8854, "step": 1210, "task_loss": 1.6183542013168335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9749068021774292, "epoch": 1.02, "learning_rate": 4.9868507560815254e-05, "loss": 0.8991, "step": 1211, "task_loss": 1.542296290397644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0921818017959595, "epoch": 1.02, "learning_rate": 4.986381140227294e-05, "loss": 0.9252, "step": 1212, "task_loss": 1.5315409898757935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46266135573387146, "epoch": 1.03, "learning_rate": 4.985911524373063e-05, "loss": 0.5816, "step": 1213, "task_loss": 0.39799585938453674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7842148542404175, "epoch": 1.03, "learning_rate": 4.985441908518832e-05, "loss": 0.8899, "step": 1214, "task_loss": 0.623865008354187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7924678325653076, "epoch": 1.03, "learning_rate": 4.9849722926646006e-05, "loss": 0.6813, "step": 1215, "task_loss": 1.4836411476135254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7346034646034241, "epoch": 1.03, "learning_rate": 4.984502676810369e-05, "loss": 0.8161, "step": 1216, "task_loss": 0.911013662815094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6410455703735352, "epoch": 1.03, "learning_rate": 4.984033060956138e-05, "loss": 0.7357, "step": 1217, "task_loss": 0.21396853029727936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7787551879882812, "epoch": 1.03, "learning_rate": 4.983563445101907e-05, "loss": 0.7078, "step": 1218, "task_loss": 2.689532518386841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6562471389770508, "epoch": 1.03, "learning_rate": 4.983093829247676e-05, "loss": 0.7621, "step": 1219, "task_loss": 0.6049256920814514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47610753774642944, "epoch": 1.03, "learning_rate": 4.9826242133934444e-05, "loss": 0.6722, "step": 1220, "task_loss": 0.581636905670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7685062289237976, "epoch": 1.03, "learning_rate": 4.982154597539213e-05, "loss": 0.647, "step": 1221, "task_loss": 1.3361307382583618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6156908273696899, "epoch": 1.03, "learning_rate": 4.981684981684982e-05, "loss": 0.6639, "step": 1222, "task_loss": 0.9097985029220581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6651860475540161, "epoch": 1.03, "learning_rate": 4.981215365830751e-05, "loss": 0.5451, "step": 1223, "task_loss": 0.1376887410879135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9289253354072571, "epoch": 1.03, "learning_rate": 4.980745749976519e-05, "loss": 0.8983, "step": 1224, "task_loss": 1.403479814529419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4269893765449524, "epoch": 1.04, "learning_rate": 4.980276134122288e-05, "loss": 0.5757, "step": 1225, "task_loss": 0.4627838432788849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4629426598548889, "epoch": 1.04, "learning_rate": 4.979806518268057e-05, "loss": 0.6944, "step": 1226, "task_loss": 0.44136321544647217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8676228523254395, "epoch": 1.04, "learning_rate": 4.9793369024138256e-05, "loss": 0.8026, "step": 1227, "task_loss": 1.024471640586853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0402860641479492, "epoch": 1.04, "learning_rate": 4.978867286559594e-05, "loss": 0.7979, "step": 1228, "task_loss": 1.2598527669906616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6387060880661011, "epoch": 1.04, "learning_rate": 4.978397670705363e-05, "loss": 0.6613, "step": 1229, "task_loss": 0.3575829863548279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5857902765274048, "epoch": 1.04, "learning_rate": 4.977928054851132e-05, "loss": 0.7542, "step": 1230, "task_loss": 0.5660538077354431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.863217830657959, "epoch": 1.04, "learning_rate": 4.977458438996901e-05, "loss": 0.7411, "step": 1231, "task_loss": 0.7006186246871948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8919067978858948, "epoch": 1.04, "learning_rate": 4.97698882314267e-05, "loss": 0.7488, "step": 1232, "task_loss": 0.7664223909378052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7116492390632629, "epoch": 1.04, "learning_rate": 4.976519207288438e-05, "loss": 0.6764, "step": 1233, "task_loss": 1.759402871131897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7736808061599731, "epoch": 1.04, "learning_rate": 4.9760495914342073e-05, "loss": 0.8395, "step": 1234, "task_loss": 0.8507089018821716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6067366003990173, "epoch": 1.04, "learning_rate": 4.975579975579976e-05, "loss": 0.7065, "step": 1235, "task_loss": 1.3265104293823242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5824638605117798, "epoch": 1.04, "learning_rate": 4.9751103597257446e-05, "loss": 0.6792, "step": 1236, "task_loss": 1.2145124673843384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5448690056800842, "epoch": 1.05, "learning_rate": 4.974640743871513e-05, "loss": 0.5802, "step": 1237, "task_loss": 1.0803186893463135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3911209106445312, "epoch": 1.05, "learning_rate": 4.974171128017282e-05, "loss": 1.0132, "step": 1238, "task_loss": 1.088618278503418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6184315085411072, "epoch": 1.05, "learning_rate": 4.973701512163051e-05, "loss": 0.9596, "step": 1239, "task_loss": 0.1925496757030487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7387796640396118, "epoch": 1.05, "learning_rate": 4.97323189630882e-05, "loss": 0.805, "step": 1240, "task_loss": 0.52329421043396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5322288870811462, "epoch": 1.05, "learning_rate": 4.9727622804545885e-05, "loss": 0.63, "step": 1241, "task_loss": 0.03427287936210632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5693274736404419, "epoch": 1.05, "learning_rate": 4.972292664600357e-05, "loss": 0.6606, "step": 1242, "task_loss": 0.47436222434043884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8992235064506531, "epoch": 1.05, "learning_rate": 4.971823048746126e-05, "loss": 0.6537, "step": 1243, "task_loss": 1.319307565689087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5618818998336792, "epoch": 1.05, "learning_rate": 4.971353432891895e-05, "loss": 0.5835, "step": 1244, "task_loss": 0.5415380001068115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9810945987701416, "epoch": 1.05, "learning_rate": 4.970883817037664e-05, "loss": 0.8421, "step": 1245, "task_loss": 1.4955774545669556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4485442340373993, "epoch": 1.05, "learning_rate": 4.970414201183432e-05, "loss": 0.7466, "step": 1246, "task_loss": 0.7752739191055298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4207412302494049, "epoch": 1.05, "learning_rate": 4.969944585329201e-05, "loss": 0.72, "step": 1247, "task_loss": 0.2097749412059784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7100080847740173, "epoch": 1.05, "learning_rate": 4.9694749694749696e-05, "loss": 0.7058, "step": 1248, "task_loss": 0.5616713762283325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0599325895309448, "epoch": 1.06, "learning_rate": 4.969005353620739e-05, "loss": 0.6725, "step": 1249, "task_loss": 1.0976859331130981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7977098226547241, "epoch": 1.06, "learning_rate": 4.968535737766507e-05, "loss": 0.8266, "step": 1250, "task_loss": 1.1248180866241455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7050226330757141, "epoch": 1.06, "learning_rate": 4.968066121912276e-05, "loss": 0.6759, "step": 1251, "task_loss": 0.8097040057182312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9294298887252808, "epoch": 1.06, "learning_rate": 4.967596506058045e-05, "loss": 0.8009, "step": 1252, "task_loss": 1.0824682712554932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8007803559303284, "epoch": 1.06, "learning_rate": 4.9671268902038134e-05, "loss": 0.7981, "step": 1253, "task_loss": 1.6562753915786743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6191568970680237, "epoch": 1.06, "learning_rate": 4.966657274349582e-05, "loss": 0.7563, "step": 1254, "task_loss": 1.4832048416137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5116437673568726, "epoch": 1.06, "learning_rate": 4.966187658495351e-05, "loss": 0.5793, "step": 1255, "task_loss": 0.07216201722621918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6266251802444458, "epoch": 1.06, "learning_rate": 4.96571804264112e-05, "loss": 0.5851, "step": 1256, "task_loss": 0.587835967540741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4913870394229889, "epoch": 1.06, "learning_rate": 4.9652484267868886e-05, "loss": 0.6983, "step": 1257, "task_loss": 0.31397953629493713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7225763201713562, "epoch": 1.06, "learning_rate": 4.964778810932657e-05, "loss": 0.7204, "step": 1258, "task_loss": 1.4201581478118896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35853835940361023, "epoch": 1.06, "learning_rate": 4.964309195078426e-05, "loss": 0.5001, "step": 1259, "task_loss": 0.052865203469991684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9247846007347107, "epoch": 1.07, "learning_rate": 4.9638395792241945e-05, "loss": 0.9588, "step": 1260, "task_loss": 0.70647132396698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6399618983268738, "epoch": 1.07, "learning_rate": 4.963369963369964e-05, "loss": 0.6726, "step": 1261, "task_loss": 1.4716986417770386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6145834922790527, "epoch": 1.07, "learning_rate": 4.9629003475157325e-05, "loss": 0.5621, "step": 1262, "task_loss": 0.7125911116600037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5137615203857422, "epoch": 1.07, "learning_rate": 4.962430731661501e-05, "loss": 0.6575, "step": 1263, "task_loss": 0.68035888671875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8028764724731445, "epoch": 1.07, "learning_rate": 4.96196111580727e-05, "loss": 0.8932, "step": 1264, "task_loss": 1.5125782489776611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.627933144569397, "epoch": 1.07, "learning_rate": 4.961491499953039e-05, "loss": 0.7321, "step": 1265, "task_loss": 0.8223745226860046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7281454205513, "epoch": 1.07, "learning_rate": 4.961021884098808e-05, "loss": 0.7866, "step": 1266, "task_loss": 0.7353380918502808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7552920579910278, "epoch": 1.07, "learning_rate": 4.9605522682445757e-05, "loss": 0.783, "step": 1267, "task_loss": 1.9622963666915894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.805246889591217, "epoch": 1.07, "learning_rate": 4.960082652390345e-05, "loss": 0.7606, "step": 1268, "task_loss": 0.6831367015838623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5910681486129761, "epoch": 1.07, "learning_rate": 4.9596130365361136e-05, "loss": 0.7526, "step": 1269, "task_loss": 0.5576522946357727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.869104266166687, "epoch": 1.07, "learning_rate": 4.959143420681883e-05, "loss": 0.8097, "step": 1270, "task_loss": 1.334325909614563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3867793083190918, "epoch": 1.07, "learning_rate": 4.958673804827651e-05, "loss": 0.9968, "step": 1271, "task_loss": 0.39703118801116943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7016822099685669, "epoch": 1.08, "learning_rate": 4.95820418897342e-05, "loss": 0.6096, "step": 1272, "task_loss": 0.7889289855957031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4595268964767456, "epoch": 1.08, "learning_rate": 4.957734573119189e-05, "loss": 0.7103, "step": 1273, "task_loss": 0.9168697595596313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5436785221099854, "epoch": 1.08, "learning_rate": 4.9572649572649575e-05, "loss": 0.7123, "step": 1274, "task_loss": 0.8932439088821411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.925026535987854, "epoch": 1.08, "learning_rate": 4.956795341410726e-05, "loss": 0.8667, "step": 1275, "task_loss": 0.9106972813606262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6150174140930176, "epoch": 1.08, "learning_rate": 4.956325725556495e-05, "loss": 0.7665, "step": 1276, "task_loss": 0.6815412640571594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7189949154853821, "epoch": 1.08, "learning_rate": 4.955856109702264e-05, "loss": 0.782, "step": 1277, "task_loss": 0.5579949617385864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.945213794708252, "epoch": 1.08, "learning_rate": 4.955386493848033e-05, "loss": 0.8507, "step": 1278, "task_loss": 1.20814049243927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5198201537132263, "epoch": 1.08, "learning_rate": 4.954916877993801e-05, "loss": 0.6254, "step": 1279, "task_loss": 0.13831321895122528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9499718546867371, "epoch": 1.08, "learning_rate": 4.95444726213957e-05, "loss": 0.7919, "step": 1280, "task_loss": 1.004666805267334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9411555528640747, "epoch": 1.08, "learning_rate": 4.9539776462853386e-05, "loss": 0.7531, "step": 1281, "task_loss": 1.5318126678466797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9158565998077393, "epoch": 1.08, "learning_rate": 4.953508030431108e-05, "loss": 0.712, "step": 1282, "task_loss": 1.5019237995147705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7218375205993652, "epoch": 1.08, "learning_rate": 4.9530384145768765e-05, "loss": 0.7939, "step": 1283, "task_loss": 1.0321540832519531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6734896302223206, "epoch": 1.09, "learning_rate": 4.952568798722645e-05, "loss": 0.6805, "step": 1284, "task_loss": 0.22773271799087524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49604663252830505, "epoch": 1.09, "learning_rate": 4.952099182868414e-05, "loss": 0.5015, "step": 1285, "task_loss": 0.4866641163825989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5691288709640503, "epoch": 1.09, "learning_rate": 4.9516295670141824e-05, "loss": 0.7545, "step": 1286, "task_loss": 0.5059365630149841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37697482109069824, "epoch": 1.09, "learning_rate": 4.951159951159952e-05, "loss": 0.6921, "step": 1287, "task_loss": 0.07297901064157486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6461402177810669, "epoch": 1.09, "learning_rate": 4.95069033530572e-05, "loss": 0.687, "step": 1288, "task_loss": 0.5798022747039795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5700920820236206, "epoch": 1.09, "learning_rate": 4.950220719451489e-05, "loss": 0.7963, "step": 1289, "task_loss": 0.028102673590183258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7761542797088623, "epoch": 1.09, "learning_rate": 4.9497511035972576e-05, "loss": 0.7498, "step": 1290, "task_loss": 0.7680091857910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7093247175216675, "epoch": 1.09, "learning_rate": 4.949281487743026e-05, "loss": 0.8072, "step": 1291, "task_loss": 0.9535436034202576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7288438081741333, "epoch": 1.09, "learning_rate": 4.9488118718887956e-05, "loss": 0.8115, "step": 1292, "task_loss": 1.1577751636505127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8672157526016235, "epoch": 1.09, "learning_rate": 4.9483422560345635e-05, "loss": 0.6668, "step": 1293, "task_loss": 0.5670070052146912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46113306283950806, "epoch": 1.09, "learning_rate": 4.947872640180333e-05, "loss": 0.5732, "step": 1294, "task_loss": 0.48061829805374146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5109890699386597, "epoch": 1.09, "learning_rate": 4.9474030243261015e-05, "loss": 0.5816, "step": 1295, "task_loss": 0.6218225955963135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7340753078460693, "epoch": 1.1, "learning_rate": 4.946933408471871e-05, "loss": 0.6715, "step": 1296, "task_loss": 0.11878528445959091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5694925785064697, "epoch": 1.1, "learning_rate": 4.946463792617639e-05, "loss": 0.5517, "step": 1297, "task_loss": 0.3058575391769409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43904221057891846, "epoch": 1.1, "learning_rate": 4.9459941767634074e-05, "loss": 0.4085, "step": 1298, "task_loss": 0.48544132709503174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1530795097351074, "epoch": 1.1, "learning_rate": 4.945524560909177e-05, "loss": 0.7667, "step": 1299, "task_loss": 1.3456939458847046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5567535161972046, "epoch": 1.1, "learning_rate": 4.945054945054945e-05, "loss": 0.5951, "step": 1300, "task_loss": 0.15451423823833466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6514447331428528, "epoch": 1.1, "learning_rate": 4.944585329200714e-05, "loss": 0.6861, "step": 1301, "task_loss": 1.0917481184005737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6338200569152832, "epoch": 1.1, "learning_rate": 4.9441157133464826e-05, "loss": 0.6894, "step": 1302, "task_loss": 0.9866353273391724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.83306884765625, "epoch": 1.1, "learning_rate": 4.943646097492252e-05, "loss": 0.6316, "step": 1303, "task_loss": 0.9079781770706177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6206182837486267, "epoch": 1.1, "learning_rate": 4.9431764816380205e-05, "loss": 0.6452, "step": 1304, "task_loss": 1.4697750806808472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4492408037185669, "epoch": 1.1, "learning_rate": 4.942706865783789e-05, "loss": 0.7952, "step": 1305, "task_loss": 0.5300272703170776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6468190550804138, "epoch": 1.1, "learning_rate": 4.942237249929558e-05, "loss": 0.668, "step": 1306, "task_loss": 0.9608815312385559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6154900193214417, "epoch": 1.1, "learning_rate": 4.9417676340753264e-05, "loss": 0.6545, "step": 1307, "task_loss": 0.24266399443149567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6244776248931885, "epoch": 1.11, "learning_rate": 4.941298018221096e-05, "loss": 0.7551, "step": 1308, "task_loss": 0.7449018955230713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8633492588996887, "epoch": 1.11, "learning_rate": 4.9408284023668644e-05, "loss": 0.6974, "step": 1309, "task_loss": 0.9308632612228394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6614588499069214, "epoch": 1.11, "learning_rate": 4.940358786512633e-05, "loss": 0.7627, "step": 1310, "task_loss": 1.6496520042419434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5786677598953247, "epoch": 1.11, "learning_rate": 4.9398891706584017e-05, "loss": 0.7021, "step": 1311, "task_loss": 0.5184558033943176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36565640568733215, "epoch": 1.11, "learning_rate": 4.93941955480417e-05, "loss": 0.5384, "step": 1312, "task_loss": 0.19317564368247986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8879238367080688, "epoch": 1.11, "learning_rate": 4.9389499389499396e-05, "loss": 0.7162, "step": 1313, "task_loss": 1.733871579170227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.649004340171814, "epoch": 1.11, "learning_rate": 4.9384803230957076e-05, "loss": 0.6792, "step": 1314, "task_loss": 1.2544273138046265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4181835949420929, "epoch": 1.11, "learning_rate": 4.938010707241477e-05, "loss": 0.5004, "step": 1315, "task_loss": 0.5713106393814087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6857585906982422, "epoch": 1.11, "learning_rate": 4.9375410913872455e-05, "loss": 0.5674, "step": 1316, "task_loss": 0.6371403336524963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8400878310203552, "epoch": 1.11, "learning_rate": 4.937071475533014e-05, "loss": 0.7457, "step": 1317, "task_loss": 0.8803353905677795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6222090721130371, "epoch": 1.11, "learning_rate": 4.936601859678783e-05, "loss": 0.6496, "step": 1318, "task_loss": 0.8294748067855835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8189681172370911, "epoch": 1.11, "learning_rate": 4.9361322438245514e-05, "loss": 0.5536, "step": 1319, "task_loss": 0.7919564843177795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4970915913581848, "epoch": 1.12, "learning_rate": 4.935662627970321e-05, "loss": 0.7303, "step": 1320, "task_loss": 0.5106635689735413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9206844568252563, "epoch": 1.12, "learning_rate": 4.9351930121160893e-05, "loss": 0.6417, "step": 1321, "task_loss": 0.9685536026954651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6231045722961426, "epoch": 1.12, "learning_rate": 4.934723396261858e-05, "loss": 0.9221, "step": 1322, "task_loss": 1.9884967803955078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6323938369750977, "epoch": 1.12, "learning_rate": 4.9342537804076266e-05, "loss": 0.6219, "step": 1323, "task_loss": 0.816940188407898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.775493323802948, "epoch": 1.12, "learning_rate": 4.933784164553395e-05, "loss": 0.7818, "step": 1324, "task_loss": 0.6912090182304382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6166880130767822, "epoch": 1.12, "learning_rate": 4.9333145486991646e-05, "loss": 0.6944, "step": 1325, "task_loss": 0.27689340710639954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5524451732635498, "epoch": 1.12, "learning_rate": 4.932844932844933e-05, "loss": 0.631, "step": 1326, "task_loss": 0.5284366011619568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5834505558013916, "epoch": 1.12, "learning_rate": 4.932375316990702e-05, "loss": 0.8601, "step": 1327, "task_loss": 0.501453697681427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6221016645431519, "epoch": 1.12, "learning_rate": 4.9319057011364705e-05, "loss": 0.5564, "step": 1328, "task_loss": 0.5623244643211365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47824230790138245, "epoch": 1.12, "learning_rate": 4.93143608528224e-05, "loss": 0.8025, "step": 1329, "task_loss": 0.16670876741409302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.779980480670929, "epoch": 1.12, "learning_rate": 4.9309664694280084e-05, "loss": 0.8125, "step": 1330, "task_loss": 1.2094398736953735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6504977941513062, "epoch": 1.13, "learning_rate": 4.9304968535737764e-05, "loss": 0.6747, "step": 1331, "task_loss": 1.3107285499572754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4757450520992279, "epoch": 1.13, "learning_rate": 4.930027237719546e-05, "loss": 0.7751, "step": 1332, "task_loss": 0.765080988407135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41826513409614563, "epoch": 1.13, "learning_rate": 4.929557621865314e-05, "loss": 0.5976, "step": 1333, "task_loss": 0.34595662355422974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4094617962837219, "epoch": 1.13, "learning_rate": 4.9290880060110836e-05, "loss": 0.6009, "step": 1334, "task_loss": 0.730431079864502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8176749348640442, "epoch": 1.13, "learning_rate": 4.9286183901568516e-05, "loss": 0.8557, "step": 1335, "task_loss": 1.1459325551986694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8718534111976624, "epoch": 1.13, "learning_rate": 4.928148774302621e-05, "loss": 0.9376, "step": 1336, "task_loss": 1.2973120212554932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3558146357536316, "epoch": 1.13, "learning_rate": 4.9276791584483895e-05, "loss": 0.8362, "step": 1337, "task_loss": 0.5811787247657776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0423654317855835, "epoch": 1.13, "learning_rate": 4.927209542594158e-05, "loss": 0.7964, "step": 1338, "task_loss": 1.681256651878357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7893009781837463, "epoch": 1.13, "learning_rate": 4.9267399267399275e-05, "loss": 0.6467, "step": 1339, "task_loss": 1.0758448839187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8247897624969482, "epoch": 1.13, "learning_rate": 4.9262703108856954e-05, "loss": 0.7105, "step": 1340, "task_loss": 1.522544503211975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6625708341598511, "epoch": 1.13, "learning_rate": 4.925800695031465e-05, "loss": 0.9045, "step": 1341, "task_loss": 0.3208436071872711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46288660168647766, "epoch": 1.13, "learning_rate": 4.9253310791772334e-05, "loss": 0.7967, "step": 1342, "task_loss": 0.40859881043434143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4174606204032898, "epoch": 1.14, "learning_rate": 4.924861463323002e-05, "loss": 0.5645, "step": 1343, "task_loss": 0.18321073055267334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1893832683563232, "epoch": 1.14, "learning_rate": 4.9243918474687706e-05, "loss": 0.823, "step": 1344, "task_loss": 1.0280473232269287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3973398208618164, "epoch": 1.14, "learning_rate": 4.923922231614539e-05, "loss": 0.5911, "step": 1345, "task_loss": 0.6870942115783691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6313991546630859, "epoch": 1.14, "learning_rate": 4.9234526157603086e-05, "loss": 0.6875, "step": 1346, "task_loss": 0.9727690815925598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6980172395706177, "epoch": 1.14, "learning_rate": 4.922982999906077e-05, "loss": 0.7374, "step": 1347, "task_loss": 0.5002503991127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5147057175636292, "epoch": 1.14, "learning_rate": 4.922513384051846e-05, "loss": 0.814, "step": 1348, "task_loss": 0.9428136348724365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3389057517051697, "epoch": 1.14, "learning_rate": 4.9220437681976145e-05, "loss": 0.5903, "step": 1349, "task_loss": 0.5447511672973633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6059243083000183, "epoch": 1.14, "learning_rate": 4.921574152343383e-05, "loss": 0.609, "step": 1350, "task_loss": 1.8156611919403076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41145578026771545, "epoch": 1.14, "learning_rate": 4.9211045364891524e-05, "loss": 0.728, "step": 1351, "task_loss": 0.9913026094436646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6190500259399414, "epoch": 1.14, "learning_rate": 4.9206349206349204e-05, "loss": 0.7839, "step": 1352, "task_loss": 0.978970468044281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8000482320785522, "epoch": 1.14, "learning_rate": 4.92016530478069e-05, "loss": 0.7777, "step": 1353, "task_loss": 0.5483626127243042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38977473974227905, "epoch": 1.14, "learning_rate": 4.919695688926458e-05, "loss": 0.581, "step": 1354, "task_loss": 0.6048043370246887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5673363208770752, "epoch": 1.15, "learning_rate": 4.919226073072227e-05, "loss": 0.7416, "step": 1355, "task_loss": 0.5679433941841125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9642918109893799, "epoch": 1.15, "learning_rate": 4.918756457217996e-05, "loss": 0.8678, "step": 1356, "task_loss": 0.20047631859779358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3476962447166443, "epoch": 1.15, "learning_rate": 4.918286841363764e-05, "loss": 0.8088, "step": 1357, "task_loss": 0.24324047565460205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9156034588813782, "epoch": 1.15, "learning_rate": 4.9178172255095335e-05, "loss": 0.8393, "step": 1358, "task_loss": 1.9249433279037476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.763570249080658, "epoch": 1.15, "learning_rate": 4.917347609655302e-05, "loss": 0.6236, "step": 1359, "task_loss": 0.23107849061489105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9968781471252441, "epoch": 1.15, "learning_rate": 4.9168779938010715e-05, "loss": 0.7562, "step": 1360, "task_loss": 0.545785665512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6500983238220215, "epoch": 1.15, "learning_rate": 4.9164083779468394e-05, "loss": 0.7808, "step": 1361, "task_loss": 0.6783839464187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4511190354824066, "epoch": 1.15, "learning_rate": 4.915938762092608e-05, "loss": 0.6541, "step": 1362, "task_loss": 0.5171618461608887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4862748086452484, "epoch": 1.15, "learning_rate": 4.9154691462383774e-05, "loss": 0.8317, "step": 1363, "task_loss": 0.8363558053970337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40697258710861206, "epoch": 1.15, "learning_rate": 4.914999530384146e-05, "loss": 0.6001, "step": 1364, "task_loss": 0.5757407546043396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42105549573898315, "epoch": 1.15, "learning_rate": 4.9145299145299147e-05, "loss": 0.6072, "step": 1365, "task_loss": 1.1838321685791016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9189705848693848, "epoch": 1.15, "learning_rate": 4.914060298675683e-05, "loss": 0.7744, "step": 1366, "task_loss": 1.1158034801483154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5393441915512085, "epoch": 1.16, "learning_rate": 4.9135906828214526e-05, "loss": 0.5768, "step": 1367, "task_loss": 0.4702691435813904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0310040712356567, "epoch": 1.16, "learning_rate": 4.913121066967221e-05, "loss": 0.7056, "step": 1368, "task_loss": 1.0094150304794312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38820499181747437, "epoch": 1.16, "learning_rate": 4.91265145111299e-05, "loss": 0.7687, "step": 1369, "task_loss": 0.5620743036270142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5300425291061401, "epoch": 1.16, "learning_rate": 4.9121818352587585e-05, "loss": 0.5418, "step": 1370, "task_loss": 0.9790509343147278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6569465398788452, "epoch": 1.16, "learning_rate": 4.911712219404527e-05, "loss": 0.5937, "step": 1371, "task_loss": 1.4016033411026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6341307163238525, "epoch": 1.16, "learning_rate": 4.9112426035502965e-05, "loss": 0.6251, "step": 1372, "task_loss": 0.5100233554840088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8187536597251892, "epoch": 1.16, "learning_rate": 4.910772987696065e-05, "loss": 0.77, "step": 1373, "task_loss": 0.9382261633872986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7493864297866821, "epoch": 1.16, "learning_rate": 4.910303371841834e-05, "loss": 0.8435, "step": 1374, "task_loss": 1.4655905961990356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5396281480789185, "epoch": 1.16, "learning_rate": 4.9098337559876024e-05, "loss": 0.76, "step": 1375, "task_loss": 1.3066457509994507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7906984090805054, "epoch": 1.16, "learning_rate": 4.909364140133371e-05, "loss": 0.739, "step": 1376, "task_loss": 0.7647501826286316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7042509317398071, "epoch": 1.16, "learning_rate": 4.90889452427914e-05, "loss": 0.7252, "step": 1377, "task_loss": 1.0582749843597412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7503671646118164, "epoch": 1.16, "learning_rate": 4.908424908424908e-05, "loss": 0.7909, "step": 1378, "task_loss": 0.24215669929981232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5873064994812012, "epoch": 1.17, "learning_rate": 4.9079552925706776e-05, "loss": 0.6132, "step": 1379, "task_loss": 0.7812985181808472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8959978818893433, "epoch": 1.17, "learning_rate": 4.907485676716446e-05, "loss": 0.7086, "step": 1380, "task_loss": 0.7291380167007446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0126283168792725, "epoch": 1.17, "learning_rate": 4.907016060862215e-05, "loss": 0.7556, "step": 1381, "task_loss": 1.4942545890808105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8307521343231201, "epoch": 1.17, "learning_rate": 4.9065464450079835e-05, "loss": 0.6981, "step": 1382, "task_loss": 0.6868078112602234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.634753406047821, "epoch": 1.17, "learning_rate": 4.906076829153752e-05, "loss": 0.6839, "step": 1383, "task_loss": 2.4826629161834717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9333901405334473, "epoch": 1.17, "learning_rate": 4.9056072132995214e-05, "loss": 0.6132, "step": 1384, "task_loss": 0.7142943143844604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6435319185256958, "epoch": 1.17, "learning_rate": 4.90513759744529e-05, "loss": 0.6868, "step": 1385, "task_loss": 0.8403874635696411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6717526912689209, "epoch": 1.17, "learning_rate": 4.904667981591059e-05, "loss": 0.6066, "step": 1386, "task_loss": 0.6957762837409973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3425019681453705, "epoch": 1.17, "learning_rate": 4.904198365736827e-05, "loss": 0.6661, "step": 1387, "task_loss": 0.25099050998687744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6465728878974915, "epoch": 1.17, "learning_rate": 4.903728749882596e-05, "loss": 0.6325, "step": 1388, "task_loss": 0.41266924142837524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6118613481521606, "epoch": 1.17, "learning_rate": 4.903259134028365e-05, "loss": 0.6334, "step": 1389, "task_loss": 1.2684624195098877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7302461862564087, "epoch": 1.17, "learning_rate": 4.902789518174134e-05, "loss": 0.7814, "step": 1390, "task_loss": 0.7224894762039185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5329036712646484, "epoch": 1.18, "learning_rate": 4.9023199023199025e-05, "loss": 0.6941, "step": 1391, "task_loss": 0.7438899874687195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.828599214553833, "epoch": 1.18, "learning_rate": 4.901850286465671e-05, "loss": 0.8444, "step": 1392, "task_loss": 0.4139252007007599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6210654377937317, "epoch": 1.18, "learning_rate": 4.90138067061144e-05, "loss": 0.903, "step": 1393, "task_loss": 1.1307477951049805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5672077536582947, "epoch": 1.18, "learning_rate": 4.900911054757209e-05, "loss": 0.7461, "step": 1394, "task_loss": 1.317976474761963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5671592950820923, "epoch": 1.18, "learning_rate": 4.900441438902977e-05, "loss": 0.8757, "step": 1395, "task_loss": 0.8834158778190613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0163724422454834, "epoch": 1.18, "learning_rate": 4.8999718230487464e-05, "loss": 0.7693, "step": 1396, "task_loss": 1.3023768663406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5705899596214294, "epoch": 1.18, "learning_rate": 4.899502207194515e-05, "loss": 0.7207, "step": 1397, "task_loss": 1.1483713388442993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7187027931213379, "epoch": 1.18, "learning_rate": 4.899032591340284e-05, "loss": 0.672, "step": 1398, "task_loss": 0.4588943421840668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7078155875205994, "epoch": 1.18, "learning_rate": 4.898562975486053e-05, "loss": 0.6821, "step": 1399, "task_loss": 1.7584826946258545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7822513580322266, "epoch": 1.18, "learning_rate": 4.898093359631821e-05, "loss": 0.6203, "step": 1400, "task_loss": 0.9964765906333923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6452760100364685, "epoch": 1.18, "learning_rate": 4.89762374377759e-05, "loss": 0.8515, "step": 1401, "task_loss": 1.3357552289962769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.82828688621521, "epoch": 1.19, "learning_rate": 4.897154127923359e-05, "loss": 0.55, "step": 1402, "task_loss": 1.0566030740737915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6824697256088257, "epoch": 1.19, "learning_rate": 4.896684512069128e-05, "loss": 0.5429, "step": 1403, "task_loss": 1.867529273033142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5981334447860718, "epoch": 1.19, "learning_rate": 4.896214896214896e-05, "loss": 0.5964, "step": 1404, "task_loss": 0.8120792508125305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4322366714477539, "epoch": 1.19, "learning_rate": 4.8957452803606654e-05, "loss": 0.6743, "step": 1405, "task_loss": 0.9135465621948242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6989839673042297, "epoch": 1.19, "learning_rate": 4.895275664506434e-05, "loss": 0.6595, "step": 1406, "task_loss": 1.323115348815918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7032515406608582, "epoch": 1.19, "learning_rate": 4.894806048652203e-05, "loss": 0.8113, "step": 1407, "task_loss": 1.4239848852157593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5719220042228699, "epoch": 1.19, "learning_rate": 4.8943364327979713e-05, "loss": 0.6589, "step": 1408, "task_loss": 1.262266993522644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8297879695892334, "epoch": 1.19, "learning_rate": 4.89386681694374e-05, "loss": 0.7681, "step": 1409, "task_loss": 1.1444751024246216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8285918235778809, "epoch": 1.19, "learning_rate": 4.893397201089509e-05, "loss": 0.6378, "step": 1410, "task_loss": 1.5723944902420044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3582594394683838, "epoch": 1.19, "learning_rate": 4.892927585235278e-05, "loss": 1.0444, "step": 1411, "task_loss": 1.5388820171356201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49790093302726746, "epoch": 1.19, "learning_rate": 4.8924579693810466e-05, "loss": 0.741, "step": 1412, "task_loss": 1.040041208267212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8510262966156006, "epoch": 1.19, "learning_rate": 4.891988353526815e-05, "loss": 0.5667, "step": 1413, "task_loss": 0.9218040108680725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7334065437316895, "epoch": 1.2, "learning_rate": 4.891518737672584e-05, "loss": 0.6119, "step": 1414, "task_loss": 1.0949002504348755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6995035409927368, "epoch": 1.2, "learning_rate": 4.891049121818353e-05, "loss": 0.8343, "step": 1415, "task_loss": 1.597287893295288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5928952693939209, "epoch": 1.2, "learning_rate": 4.890579505964122e-05, "loss": 0.6578, "step": 1416, "task_loss": 1.0109384059906006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9946739077568054, "epoch": 1.2, "learning_rate": 4.8901098901098904e-05, "loss": 0.7741, "step": 1417, "task_loss": 1.490555763244629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6486961841583252, "epoch": 1.2, "learning_rate": 4.889640274255659e-05, "loss": 0.7022, "step": 1418, "task_loss": 1.1189180612564087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46602484583854675, "epoch": 1.2, "learning_rate": 4.889170658401428e-05, "loss": 0.8156, "step": 1419, "task_loss": 0.8417402505874634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4847773611545563, "epoch": 1.2, "learning_rate": 4.888701042547197e-05, "loss": 0.6477, "step": 1420, "task_loss": 0.04592149704694748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2685670256614685, "epoch": 1.2, "learning_rate": 4.888231426692965e-05, "loss": 0.4722, "step": 1421, "task_loss": 0.040146518498659134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1056642532348633, "epoch": 1.2, "learning_rate": 4.887761810838734e-05, "loss": 0.777, "step": 1422, "task_loss": 1.6640281677246094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4153333604335785, "epoch": 1.2, "learning_rate": 4.887292194984503e-05, "loss": 0.6116, "step": 1423, "task_loss": 0.621565580368042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6438086628913879, "epoch": 1.2, "learning_rate": 4.886822579130272e-05, "loss": 0.6486, "step": 1424, "task_loss": 1.165236473083496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8604700565338135, "epoch": 1.2, "learning_rate": 4.88635296327604e-05, "loss": 0.8168, "step": 1425, "task_loss": 1.1765363216400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46633028984069824, "epoch": 1.21, "learning_rate": 4.885883347421809e-05, "loss": 0.4986, "step": 1426, "task_loss": 1.0633851289749146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8429061770439148, "epoch": 1.21, "learning_rate": 4.885413731567578e-05, "loss": 0.7725, "step": 1427, "task_loss": 1.5832393169403076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8090022802352905, "epoch": 1.21, "learning_rate": 4.884944115713347e-05, "loss": 0.7922, "step": 1428, "task_loss": 1.664628267288208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8478543758392334, "epoch": 1.21, "learning_rate": 4.8844744998591154e-05, "loss": 0.7248, "step": 1429, "task_loss": 1.1572437286376953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7138170599937439, "epoch": 1.21, "learning_rate": 4.884004884004884e-05, "loss": 0.6401, "step": 1430, "task_loss": 0.8682454824447632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5325661897659302, "epoch": 1.21, "learning_rate": 4.883535268150653e-05, "loss": 0.6, "step": 1431, "task_loss": 0.8485495448112488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.207361102104187, "epoch": 1.21, "learning_rate": 4.883065652296422e-05, "loss": 0.8543, "step": 1432, "task_loss": 1.496865153312683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3847033381462097, "epoch": 1.21, "learning_rate": 4.8825960364421906e-05, "loss": 0.6112, "step": 1433, "task_loss": 0.16500955820083618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5243787169456482, "epoch": 1.21, "learning_rate": 4.882126420587959e-05, "loss": 0.6966, "step": 1434, "task_loss": 0.7289472818374634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36489444971084595, "epoch": 1.21, "learning_rate": 4.881656804733728e-05, "loss": 0.5224, "step": 1435, "task_loss": 0.3598242998123169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.929207980632782, "epoch": 1.21, "learning_rate": 4.881187188879497e-05, "loss": 0.65, "step": 1436, "task_loss": 0.89776211977005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7676011323928833, "epoch": 1.21, "learning_rate": 4.880717573025266e-05, "loss": 0.5714, "step": 1437, "task_loss": 0.7398805618286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6778333187103271, "epoch": 1.22, "learning_rate": 4.8802479571710344e-05, "loss": 0.696, "step": 1438, "task_loss": 0.5839777588844299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.5627596378326416, "epoch": 1.22, "learning_rate": 4.879778341316803e-05, "loss": 1.0132, "step": 1439, "task_loss": 1.677577018737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7751020193099976, "epoch": 1.22, "learning_rate": 4.879308725462572e-05, "loss": 0.6619, "step": 1440, "task_loss": 0.3062921166419983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0167386531829834, "epoch": 1.22, "learning_rate": 4.878839109608341e-05, "loss": 0.8926, "step": 1441, "task_loss": 0.772346556186676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6314076781272888, "epoch": 1.22, "learning_rate": 4.878369493754109e-05, "loss": 0.5538, "step": 1442, "task_loss": 0.8512011170387268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4654419422149658, "epoch": 1.22, "learning_rate": 4.877899877899878e-05, "loss": 0.6992, "step": 1443, "task_loss": 0.6515836715698242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35642409324645996, "epoch": 1.22, "learning_rate": 4.877430262045647e-05, "loss": 0.493, "step": 1444, "task_loss": 0.24259091913700104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6541587710380554, "epoch": 1.22, "learning_rate": 4.8769606461914155e-05, "loss": 0.771, "step": 1445, "task_loss": 0.4378105401992798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0534685850143433, "epoch": 1.22, "learning_rate": 4.876491030337185e-05, "loss": 0.7387, "step": 1446, "task_loss": 1.2129827737808228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2433269023895264, "epoch": 1.22, "learning_rate": 4.876021414482953e-05, "loss": 0.8619, "step": 1447, "task_loss": 1.5188193321228027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.757678210735321, "epoch": 1.22, "learning_rate": 4.875551798628722e-05, "loss": 0.5173, "step": 1448, "task_loss": 0.6181909441947937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5193491578102112, "epoch": 1.22, "learning_rate": 4.875082182774491e-05, "loss": 0.5844, "step": 1449, "task_loss": 0.29249513149261475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8145077228546143, "epoch": 1.23, "learning_rate": 4.8746125669202594e-05, "loss": 0.7726, "step": 1450, "task_loss": 1.3930768966674805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.594058632850647, "epoch": 1.23, "learning_rate": 4.874142951066028e-05, "loss": 0.6594, "step": 1451, "task_loss": 0.8744868040084839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9866927862167358, "epoch": 1.23, "learning_rate": 4.8736733352117967e-05, "loss": 0.6829, "step": 1452, "task_loss": 2.104647159576416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5556595325469971, "epoch": 1.23, "learning_rate": 4.873203719357566e-05, "loss": 0.7088, "step": 1453, "task_loss": 0.4150608777999878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4429343342781067, "epoch": 1.23, "learning_rate": 4.8727341035033346e-05, "loss": 0.5612, "step": 1454, "task_loss": 0.7755537033081055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49817875027656555, "epoch": 1.23, "learning_rate": 4.872264487649103e-05, "loss": 0.5209, "step": 1455, "task_loss": 0.7862489819526672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3651057481765747, "epoch": 1.23, "learning_rate": 4.871794871794872e-05, "loss": 0.8477, "step": 1456, "task_loss": 0.3501637279987335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5044481754302979, "epoch": 1.23, "learning_rate": 4.8713252559406405e-05, "loss": 0.7084, "step": 1457, "task_loss": 0.41368645429611206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7046396732330322, "epoch": 1.23, "learning_rate": 4.87085564008641e-05, "loss": 0.8082, "step": 1458, "task_loss": 1.2316187620162964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.616681694984436, "epoch": 1.23, "learning_rate": 4.870386024232178e-05, "loss": 0.7432, "step": 1459, "task_loss": 0.7891397476196289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6617809534072876, "epoch": 1.23, "learning_rate": 4.869916408377947e-05, "loss": 0.7505, "step": 1460, "task_loss": 2.1454050540924072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7211824059486389, "epoch": 1.23, "learning_rate": 4.869446792523716e-05, "loss": 0.6755, "step": 1461, "task_loss": 1.1156843900680542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6543716192245483, "epoch": 1.24, "learning_rate": 4.868977176669485e-05, "loss": 0.7326, "step": 1462, "task_loss": 0.4479277729988098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49115681648254395, "epoch": 1.24, "learning_rate": 4.868507560815254e-05, "loss": 0.6575, "step": 1463, "task_loss": 0.5252255797386169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47477197647094727, "epoch": 1.24, "learning_rate": 4.8680379449610216e-05, "loss": 0.623, "step": 1464, "task_loss": 1.4158002138137817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5702205896377563, "epoch": 1.24, "learning_rate": 4.867568329106791e-05, "loss": 0.6556, "step": 1465, "task_loss": 0.9019696116447449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.468389630317688, "epoch": 1.24, "learning_rate": 4.8670987132525596e-05, "loss": 0.5573, "step": 1466, "task_loss": 0.652245044708252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4848339855670929, "epoch": 1.24, "learning_rate": 4.866629097398329e-05, "loss": 0.6065, "step": 1467, "task_loss": 0.4609755277633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6450019478797913, "epoch": 1.24, "learning_rate": 4.866159481544097e-05, "loss": 0.6613, "step": 1468, "task_loss": 1.2018921375274658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.3709689378738403, "epoch": 1.24, "learning_rate": 4.865689865689866e-05, "loss": 0.7391, "step": 1469, "task_loss": 1.4850081205368042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5193872451782227, "epoch": 1.24, "learning_rate": 4.865220249835635e-05, "loss": 0.5853, "step": 1470, "task_loss": 0.6702150106430054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.547351062297821, "epoch": 1.24, "learning_rate": 4.8647506339814034e-05, "loss": 0.5512, "step": 1471, "task_loss": 0.9600668549537659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6322142481803894, "epoch": 1.24, "learning_rate": 4.864281018127172e-05, "loss": 0.6738, "step": 1472, "task_loss": 0.6918298006057739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7747842073440552, "epoch": 1.24, "learning_rate": 4.863811402272941e-05, "loss": 0.6032, "step": 1473, "task_loss": 0.6017622947692871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6447668075561523, "epoch": 1.25, "learning_rate": 4.86334178641871e-05, "loss": 0.574, "step": 1474, "task_loss": 0.41675034165382385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4318716526031494, "epoch": 1.25, "learning_rate": 4.8628721705644786e-05, "loss": 0.572, "step": 1475, "task_loss": 1.0527219772338867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.676533043384552, "epoch": 1.25, "learning_rate": 4.862402554710247e-05, "loss": 0.5477, "step": 1476, "task_loss": 1.4153600931167603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6107629537582397, "epoch": 1.25, "learning_rate": 4.861932938856016e-05, "loss": 0.749, "step": 1477, "task_loss": 0.7063106298446655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48590511083602905, "epoch": 1.25, "learning_rate": 4.8614633230017845e-05, "loss": 0.5163, "step": 1478, "task_loss": 0.8698598742485046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42161017656326294, "epoch": 1.25, "learning_rate": 4.860993707147554e-05, "loss": 0.5406, "step": 1479, "task_loss": 0.6526816487312317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36456745862960815, "epoch": 1.25, "learning_rate": 4.8605240912933225e-05, "loss": 0.53, "step": 1480, "task_loss": 0.6898282766342163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9475955963134766, "epoch": 1.25, "learning_rate": 4.860054475439091e-05, "loss": 0.616, "step": 1481, "task_loss": 1.2943830490112305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5560633540153503, "epoch": 1.25, "learning_rate": 4.85958485958486e-05, "loss": 0.637, "step": 1482, "task_loss": 1.953587532043457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5974971652030945, "epoch": 1.25, "learning_rate": 4.8591152437306284e-05, "loss": 0.8936, "step": 1483, "task_loss": 1.8227041959762573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6112303733825684, "epoch": 1.25, "learning_rate": 4.858645627876398e-05, "loss": 0.6685, "step": 1484, "task_loss": 0.3551182150840759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5870187878608704, "epoch": 1.26, "learning_rate": 4.8581760120221656e-05, "loss": 0.8833, "step": 1485, "task_loss": 0.7022114992141724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33385583758354187, "epoch": 1.26, "learning_rate": 4.857706396167935e-05, "loss": 0.5457, "step": 1486, "task_loss": 0.8311796188354492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37253236770629883, "epoch": 1.26, "learning_rate": 4.8572367803137036e-05, "loss": 0.4951, "step": 1487, "task_loss": 0.46015480160713196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6860008835792542, "epoch": 1.26, "learning_rate": 4.856767164459472e-05, "loss": 0.5868, "step": 1488, "task_loss": 0.6789003014564514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44354456663131714, "epoch": 1.26, "learning_rate": 4.856297548605241e-05, "loss": 0.6505, "step": 1489, "task_loss": 0.6948358416557312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7355843186378479, "epoch": 1.26, "learning_rate": 4.8558279327510095e-05, "loss": 0.7142, "step": 1490, "task_loss": 0.5633959770202637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5250322818756104, "epoch": 1.26, "learning_rate": 4.855358316896779e-05, "loss": 0.631, "step": 1491, "task_loss": 0.6718034744262695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5688008666038513, "epoch": 1.26, "learning_rate": 4.8548887010425474e-05, "loss": 0.4647, "step": 1492, "task_loss": 1.1614246368408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8129573464393616, "epoch": 1.26, "learning_rate": 4.854419085188317e-05, "loss": 0.6647, "step": 1493, "task_loss": 1.364094614982605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9073379039764404, "epoch": 1.26, "learning_rate": 4.853949469334085e-05, "loss": 0.9391, "step": 1494, "task_loss": 0.48791617155075073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.622846245765686, "epoch": 1.26, "learning_rate": 4.8534798534798533e-05, "loss": 0.657, "step": 1495, "task_loss": 1.2407217025756836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5800409913063049, "epoch": 1.26, "learning_rate": 4.8530102376256227e-05, "loss": 0.4548, "step": 1496, "task_loss": 0.603920042514801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8032512664794922, "epoch": 1.27, "learning_rate": 4.852540621771391e-05, "loss": 0.6589, "step": 1497, "task_loss": 0.5093135833740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6750850677490234, "epoch": 1.27, "learning_rate": 4.85207100591716e-05, "loss": 0.5619, "step": 1498, "task_loss": 0.8857743144035339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.606971263885498, "epoch": 1.27, "learning_rate": 4.8516013900629286e-05, "loss": 0.5495, "step": 1499, "task_loss": 1.0862387418746948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8809822797775269, "epoch": 1.27, "learning_rate": 4.851131774208698e-05, "loss": 0.816, "step": 1500, "task_loss": 2.204951286315918 }, { "epoch": 1.27, "eval_accuracy": 0.8954059405940594, "eval_loss": 0.3943859934806824, "eval_runtime": 228.6362, "eval_samples_per_second": 110.437, "eval_steps_per_second": 0.866, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31930065155029297, "epoch": 1.27, "learning_rate": 4.8506621583544665e-05, "loss": 0.6523, "step": 1501, "task_loss": 1.2492903470993042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7212862968444824, "epoch": 1.27, "learning_rate": 4.850192542500235e-05, "loss": 0.6757, "step": 1502, "task_loss": 0.819610595703125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2794511318206787, "epoch": 1.27, "learning_rate": 4.849722926646004e-05, "loss": 0.4095, "step": 1503, "task_loss": 1.114783763885498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.794372022151947, "epoch": 1.27, "learning_rate": 4.8492533107917724e-05, "loss": 0.7523, "step": 1504, "task_loss": 0.6503158807754517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9146854877471924, "epoch": 1.27, "learning_rate": 4.848783694937542e-05, "loss": 0.7378, "step": 1505, "task_loss": 0.7169913053512573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5197542905807495, "epoch": 1.27, "learning_rate": 4.84831407908331e-05, "loss": 0.6416, "step": 1506, "task_loss": 0.8979859948158264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35962212085723877, "epoch": 1.27, "learning_rate": 4.847844463229079e-05, "loss": 0.5496, "step": 1507, "task_loss": 0.6904214024543762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9617994427680969, "epoch": 1.27, "learning_rate": 4.8473748473748476e-05, "loss": 0.7286, "step": 1508, "task_loss": 1.0132970809936523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7497919201850891, "epoch": 1.28, "learning_rate": 4.846905231520616e-05, "loss": 0.669, "step": 1509, "task_loss": 0.29590731859207153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49269795417785645, "epoch": 1.28, "learning_rate": 4.8464356156663856e-05, "loss": 0.658, "step": 1510, "task_loss": 1.5304381847381592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6285421848297119, "epoch": 1.28, "learning_rate": 4.8459659998121535e-05, "loss": 0.715, "step": 1511, "task_loss": 0.7294623255729675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7012124061584473, "epoch": 1.28, "learning_rate": 4.845496383957923e-05, "loss": 0.5627, "step": 1512, "task_loss": 0.8408117294311523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8051336407661438, "epoch": 1.28, "learning_rate": 4.8450267681036915e-05, "loss": 0.6118, "step": 1513, "task_loss": 0.5096275806427002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9508499503135681, "epoch": 1.28, "learning_rate": 4.84455715224946e-05, "loss": 0.7496, "step": 1514, "task_loss": 1.653529405593872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3358496427536011, "epoch": 1.28, "learning_rate": 4.844087536395229e-05, "loss": 0.6558, "step": 1515, "task_loss": 0.043991342186927795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7455700635910034, "epoch": 1.28, "learning_rate": 4.8436179205409974e-05, "loss": 0.6348, "step": 1516, "task_loss": 0.8194367289543152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.515150785446167, "epoch": 1.28, "learning_rate": 4.843148304686767e-05, "loss": 0.6803, "step": 1517, "task_loss": 1.407773733139038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4168629050254822, "epoch": 1.28, "learning_rate": 4.842678688832535e-05, "loss": 0.6127, "step": 1518, "task_loss": 0.5214310884475708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7923804521560669, "epoch": 1.28, "learning_rate": 4.842209072978304e-05, "loss": 0.8023, "step": 1519, "task_loss": 0.2299679070711136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7684105634689331, "epoch": 1.28, "learning_rate": 4.8417394571240726e-05, "loss": 0.7438, "step": 1520, "task_loss": 1.7137370109558105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7027855515480042, "epoch": 1.29, "learning_rate": 4.841269841269841e-05, "loss": 0.7348, "step": 1521, "task_loss": 1.4995673894882202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0628635883331299, "epoch": 1.29, "learning_rate": 4.8408002254156105e-05, "loss": 0.629, "step": 1522, "task_loss": 1.3018572330474854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43159350752830505, "epoch": 1.29, "learning_rate": 4.840330609561379e-05, "loss": 0.5522, "step": 1523, "task_loss": 0.08686469495296478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8554230332374573, "epoch": 1.29, "learning_rate": 4.839860993707148e-05, "loss": 0.7022, "step": 1524, "task_loss": 0.9596909284591675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.728079080581665, "epoch": 1.29, "learning_rate": 4.8393913778529164e-05, "loss": 0.7114, "step": 1525, "task_loss": 1.3223060369491577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3996451199054718, "epoch": 1.29, "learning_rate": 4.838921761998686e-05, "loss": 0.5716, "step": 1526, "task_loss": 0.09382757544517517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5382207632064819, "epoch": 1.29, "learning_rate": 4.8384521461444544e-05, "loss": 0.6078, "step": 1527, "task_loss": 0.8313444256782532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.609110951423645, "epoch": 1.29, "learning_rate": 4.837982530290222e-05, "loss": 0.6282, "step": 1528, "task_loss": 0.4950845539569855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6743806004524231, "epoch": 1.29, "learning_rate": 4.8375129144359916e-05, "loss": 0.8634, "step": 1529, "task_loss": 1.0592788457870483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5122683644294739, "epoch": 1.29, "learning_rate": 4.83704329858176e-05, "loss": 0.6045, "step": 1530, "task_loss": 0.5953865051269531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5554242134094238, "epoch": 1.29, "learning_rate": 4.8365736827275296e-05, "loss": 0.8185, "step": 1531, "task_loss": 0.5615949630737305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4926005005836487, "epoch": 1.29, "learning_rate": 4.8361040668732975e-05, "loss": 0.5122, "step": 1532, "task_loss": 0.8189513683319092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5228354930877686, "epoch": 1.3, "learning_rate": 4.835634451019067e-05, "loss": 0.512, "step": 1533, "task_loss": 0.8986006379127502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7789825797080994, "epoch": 1.3, "learning_rate": 4.8351648351648355e-05, "loss": 0.6405, "step": 1534, "task_loss": 1.1608827114105225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4193393588066101, "epoch": 1.3, "learning_rate": 4.834695219310604e-05, "loss": 0.5979, "step": 1535, "task_loss": 0.5649136304855347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0244003534317017, "epoch": 1.3, "learning_rate": 4.834225603456373e-05, "loss": 0.7614, "step": 1536, "task_loss": 0.6970483064651489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6973425149917603, "epoch": 1.3, "learning_rate": 4.8337559876021414e-05, "loss": 0.6144, "step": 1537, "task_loss": 1.1468579769134521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.953492283821106, "epoch": 1.3, "learning_rate": 4.833286371747911e-05, "loss": 0.5734, "step": 1538, "task_loss": 0.5555250644683838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5364861488342285, "epoch": 1.3, "learning_rate": 4.832816755893679e-05, "loss": 0.665, "step": 1539, "task_loss": 0.5105794668197632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1393046379089355, "epoch": 1.3, "learning_rate": 4.832347140039448e-05, "loss": 0.7321, "step": 1540, "task_loss": 1.580297589302063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6072041988372803, "epoch": 1.3, "learning_rate": 4.8318775241852166e-05, "loss": 0.5387, "step": 1541, "task_loss": 0.4725264310836792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7581283450126648, "epoch": 1.3, "learning_rate": 4.831407908330985e-05, "loss": 0.7075, "step": 1542, "task_loss": 1.1358823776245117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6209812760353088, "epoch": 1.3, "learning_rate": 4.8309382924767545e-05, "loss": 0.8087, "step": 1543, "task_loss": 0.7734580636024475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.075265884399414, "epoch": 1.3, "learning_rate": 4.830468676622523e-05, "loss": 0.6882, "step": 1544, "task_loss": 2.2783830165863037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.656969428062439, "epoch": 1.31, "learning_rate": 4.829999060768292e-05, "loss": 0.5547, "step": 1545, "task_loss": 0.7023230195045471 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44739753007888794, "epoch": 1.31, "learning_rate": 4.8295294449140604e-05, "loss": 0.5315, "step": 1546, "task_loss": 0.4201110601425171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5791067481040955, "epoch": 1.31, "learning_rate": 4.829059829059829e-05, "loss": 0.7108, "step": 1547, "task_loss": 0.375360906124115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7872514724731445, "epoch": 1.31, "learning_rate": 4.8285902132055984e-05, "loss": 0.5944, "step": 1548, "task_loss": 1.5709960460662842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8588241338729858, "epoch": 1.31, "learning_rate": 4.8281205973513664e-05, "loss": 0.6576, "step": 1549, "task_loss": 0.7646786570549011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8478990197181702, "epoch": 1.31, "learning_rate": 4.827650981497136e-05, "loss": 0.6539, "step": 1550, "task_loss": 0.6614632606506348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4807296097278595, "epoch": 1.31, "learning_rate": 4.827181365642904e-05, "loss": 0.5384, "step": 1551, "task_loss": 0.3658294975757599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6647264957427979, "epoch": 1.31, "learning_rate": 4.826711749788673e-05, "loss": 0.8314, "step": 1552, "task_loss": 0.6389110088348389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6121876835823059, "epoch": 1.31, "learning_rate": 4.826242133934442e-05, "loss": 0.5632, "step": 1553, "task_loss": 1.105015754699707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6053259372711182, "epoch": 1.31, "learning_rate": 4.82577251808021e-05, "loss": 0.6133, "step": 1554, "task_loss": 0.7961587309837341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4095574915409088, "epoch": 1.31, "learning_rate": 4.8253029022259795e-05, "loss": 0.5772, "step": 1555, "task_loss": 0.6977249383926392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6837360858917236, "epoch": 1.32, "learning_rate": 4.824833286371748e-05, "loss": 0.7049, "step": 1556, "task_loss": 1.1659740209579468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4575137794017792, "epoch": 1.32, "learning_rate": 4.8243636705175175e-05, "loss": 0.5257, "step": 1557, "task_loss": 0.9253278374671936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4212902784347534, "epoch": 1.32, "learning_rate": 4.8238940546632854e-05, "loss": 0.702, "step": 1558, "task_loss": 0.20307455956935883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3127292990684509, "epoch": 1.32, "learning_rate": 4.823424438809054e-05, "loss": 0.5411, "step": 1559, "task_loss": 1.1613383293151855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7592552900314331, "epoch": 1.32, "learning_rate": 4.8229548229548234e-05, "loss": 0.7108, "step": 1560, "task_loss": 1.1608394384384155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9071587324142456, "epoch": 1.32, "learning_rate": 4.822485207100592e-05, "loss": 0.7645, "step": 1561, "task_loss": 0.8745078444480896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3181525766849518, "epoch": 1.32, "learning_rate": 4.8220155912463606e-05, "loss": 0.4542, "step": 1562, "task_loss": 0.06940240412950516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6186635494232178, "epoch": 1.32, "learning_rate": 4.821545975392129e-05, "loss": 0.6455, "step": 1563, "task_loss": 0.578852653503418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.410347044467926, "epoch": 1.32, "learning_rate": 4.8210763595378986e-05, "loss": 0.6124, "step": 1564, "task_loss": 1.0148606300354004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7263593673706055, "epoch": 1.32, "learning_rate": 4.820606743683667e-05, "loss": 0.5382, "step": 1565, "task_loss": 0.4010551869869232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9422719478607178, "epoch": 1.32, "learning_rate": 4.820137127829435e-05, "loss": 0.8576, "step": 1566, "task_loss": 0.9096721410751343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5140295028686523, "epoch": 1.32, "learning_rate": 4.8196675119752045e-05, "loss": 0.6439, "step": 1567, "task_loss": 0.4952224791049957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7024980783462524, "epoch": 1.33, "learning_rate": 4.819197896120973e-05, "loss": 0.5758, "step": 1568, "task_loss": 0.7159125208854675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5138143301010132, "epoch": 1.33, "learning_rate": 4.8187282802667424e-05, "loss": 0.6917, "step": 1569, "task_loss": 0.6480473279953003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3256946802139282, "epoch": 1.33, "learning_rate": 4.818258664412511e-05, "loss": 0.4723, "step": 1570, "task_loss": 0.7325423955917358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5533435344696045, "epoch": 1.33, "learning_rate": 4.81778904855828e-05, "loss": 0.5658, "step": 1571, "task_loss": 0.5203610062599182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.592958390712738, "epoch": 1.33, "learning_rate": 4.817319432704048e-05, "loss": 0.453, "step": 1572, "task_loss": 0.44023460149765015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4569319486618042, "epoch": 1.33, "learning_rate": 4.816849816849817e-05, "loss": 0.5174, "step": 1573, "task_loss": 0.10886258631944656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4337739944458008, "epoch": 1.33, "learning_rate": 4.816380200995586e-05, "loss": 0.4938, "step": 1574, "task_loss": 0.5731622576713562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43101778626441956, "epoch": 1.33, "learning_rate": 4.815910585141354e-05, "loss": 0.4946, "step": 1575, "task_loss": 1.2174791097640991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3706350326538086, "epoch": 1.33, "learning_rate": 4.8154409692871235e-05, "loss": 0.3908, "step": 1576, "task_loss": 0.12853409349918365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3911697268486023, "epoch": 1.33, "learning_rate": 4.814971353432892e-05, "loss": 0.4099, "step": 1577, "task_loss": 0.2425040453672409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0136144161224365, "epoch": 1.33, "learning_rate": 4.814501737578661e-05, "loss": 0.7654, "step": 1578, "task_loss": 0.28306055068969727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5577731132507324, "epoch": 1.33, "learning_rate": 4.8140321217244294e-05, "loss": 0.7658, "step": 1579, "task_loss": 1.4907143115997314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6440142393112183, "epoch": 1.34, "learning_rate": 4.813562505870198e-05, "loss": 0.6122, "step": 1580, "task_loss": 0.9424047470092773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6828206181526184, "epoch": 1.34, "learning_rate": 4.8130928900159674e-05, "loss": 0.706, "step": 1581, "task_loss": 0.8642055988311768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49305492639541626, "epoch": 1.34, "learning_rate": 4.812623274161736e-05, "loss": 0.6297, "step": 1582, "task_loss": 1.0863714218139648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2583116888999939, "epoch": 1.34, "learning_rate": 4.8121536583075046e-05, "loss": 0.4294, "step": 1583, "task_loss": 0.6684529185295105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7939484119415283, "epoch": 1.34, "learning_rate": 4.811684042453273e-05, "loss": 0.6723, "step": 1584, "task_loss": 1.183083415031433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0646734237670898, "epoch": 1.34, "learning_rate": 4.811214426599042e-05, "loss": 0.7159, "step": 1585, "task_loss": 0.5680029392242432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5453145503997803, "epoch": 1.34, "learning_rate": 4.810744810744811e-05, "loss": 0.5912, "step": 1586, "task_loss": 0.18548765778541565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6063659191131592, "epoch": 1.34, "learning_rate": 4.81027519489058e-05, "loss": 0.5761, "step": 1587, "task_loss": 0.41710150241851807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7054169178009033, "epoch": 1.34, "learning_rate": 4.8098055790363485e-05, "loss": 0.6263, "step": 1588, "task_loss": 0.5029739737510681 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3755860924720764, "epoch": 1.34, "learning_rate": 4.809335963182117e-05, "loss": 0.5577, "step": 1589, "task_loss": 0.47138115763664246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5435585975646973, "epoch": 1.34, "learning_rate": 4.808866347327886e-05, "loss": 0.7374, "step": 1590, "task_loss": 0.7633550763130188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7981275916099548, "epoch": 1.34, "learning_rate": 4.808396731473655e-05, "loss": 0.5895, "step": 1591, "task_loss": 0.7119269371032715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48303720355033875, "epoch": 1.35, "learning_rate": 4.807927115619423e-05, "loss": 0.4605, "step": 1592, "task_loss": 1.0852748155593872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7031118273735046, "epoch": 1.35, "learning_rate": 4.8074574997651923e-05, "loss": 0.5205, "step": 1593, "task_loss": 1.1663708686828613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4383600652217865, "epoch": 1.35, "learning_rate": 4.806987883910961e-05, "loss": 0.5212, "step": 1594, "task_loss": 0.9021636843681335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.418383926153183, "epoch": 1.35, "learning_rate": 4.80651826805673e-05, "loss": 0.5409, "step": 1595, "task_loss": 0.13773605227470398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3269578814506531, "epoch": 1.35, "learning_rate": 4.806048652202498e-05, "loss": 0.6929, "step": 1596, "task_loss": 1.1575239896774292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44816088676452637, "epoch": 1.35, "learning_rate": 4.8055790363482676e-05, "loss": 0.5433, "step": 1597, "task_loss": 0.7787641286849976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4737761616706848, "epoch": 1.35, "learning_rate": 4.805109420494036e-05, "loss": 0.587, "step": 1598, "task_loss": 0.6614913940429688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.970577597618103, "epoch": 1.35, "learning_rate": 4.804639804639805e-05, "loss": 0.6996, "step": 1599, "task_loss": 1.2826341390609741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5127413272857666, "epoch": 1.35, "learning_rate": 4.804170188785574e-05, "loss": 0.6209, "step": 1600, "task_loss": 0.5460997819900513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36214709281921387, "epoch": 1.35, "learning_rate": 4.803700572931342e-05, "loss": 0.6312, "step": 1601, "task_loss": 1.021809697151184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5148459672927856, "epoch": 1.35, "learning_rate": 4.8032309570771114e-05, "loss": 0.611, "step": 1602, "task_loss": 0.3887481987476349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6700729727745056, "epoch": 1.35, "learning_rate": 4.80276134122288e-05, "loss": 0.5438, "step": 1603, "task_loss": 0.4404171407222748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5128576159477234, "epoch": 1.36, "learning_rate": 4.802291725368649e-05, "loss": 0.6531, "step": 1604, "task_loss": 0.3466219902038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5506155490875244, "epoch": 1.36, "learning_rate": 4.801822109514417e-05, "loss": 0.6177, "step": 1605, "task_loss": 0.17074881494045258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.846977949142456, "epoch": 1.36, "learning_rate": 4.801352493660186e-05, "loss": 0.601, "step": 1606, "task_loss": 0.7702576518058777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.588234543800354, "epoch": 1.36, "learning_rate": 4.800882877805955e-05, "loss": 0.6555, "step": 1607, "task_loss": 0.9942833185195923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6817035675048828, "epoch": 1.36, "learning_rate": 4.800413261951724e-05, "loss": 0.7148, "step": 1608, "task_loss": 0.9131969213485718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43112996220588684, "epoch": 1.36, "learning_rate": 4.7999436460974925e-05, "loss": 0.5319, "step": 1609, "task_loss": 0.9301946759223938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.846962571144104, "epoch": 1.36, "learning_rate": 4.799474030243261e-05, "loss": 0.7697, "step": 1610, "task_loss": 1.4009406566619873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8207221627235413, "epoch": 1.36, "learning_rate": 4.79900441438903e-05, "loss": 0.8219, "step": 1611, "task_loss": 0.9331570863723755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6331489086151123, "epoch": 1.36, "learning_rate": 4.798534798534799e-05, "loss": 0.662, "step": 1612, "task_loss": 0.7073463201522827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4149402379989624, "epoch": 1.36, "learning_rate": 4.798065182680567e-05, "loss": 0.628, "step": 1613, "task_loss": 0.58563232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6279155015945435, "epoch": 1.36, "learning_rate": 4.7975955668263364e-05, "loss": 0.6153, "step": 1614, "task_loss": 1.2558414936065674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40521812438964844, "epoch": 1.36, "learning_rate": 4.797125950972105e-05, "loss": 0.5038, "step": 1615, "task_loss": 0.4520301818847656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4847283661365509, "epoch": 1.37, "learning_rate": 4.7966563351178736e-05, "loss": 0.5033, "step": 1616, "task_loss": 0.4998977482318878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5778762102127075, "epoch": 1.37, "learning_rate": 4.796186719263643e-05, "loss": 0.5275, "step": 1617, "task_loss": 0.6062279343605042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3704608082771301, "epoch": 1.37, "learning_rate": 4.795717103409411e-05, "loss": 0.5168, "step": 1618, "task_loss": 0.7343268990516663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45506542921066284, "epoch": 1.37, "learning_rate": 4.79524748755518e-05, "loss": 0.5677, "step": 1619, "task_loss": 0.35434967279434204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37402915954589844, "epoch": 1.37, "learning_rate": 4.794777871700949e-05, "loss": 0.5393, "step": 1620, "task_loss": 0.035643644630908966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0044082403182983, "epoch": 1.37, "learning_rate": 4.794308255846718e-05, "loss": 0.707, "step": 1621, "task_loss": 0.9272604584693909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4245780110359192, "epoch": 1.37, "learning_rate": 4.793838639992486e-05, "loss": 0.5962, "step": 1622, "task_loss": 0.5920388102531433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4326555132865906, "epoch": 1.37, "learning_rate": 4.793369024138255e-05, "loss": 0.4418, "step": 1623, "task_loss": 1.226893424987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6501697897911072, "epoch": 1.37, "learning_rate": 4.792899408284024e-05, "loss": 0.6174, "step": 1624, "task_loss": 1.0539603233337402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6324215531349182, "epoch": 1.37, "learning_rate": 4.792429792429793e-05, "loss": 0.6366, "step": 1625, "task_loss": 0.9670824408531189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.791627824306488, "epoch": 1.37, "learning_rate": 4.791960176575561e-05, "loss": 0.6822, "step": 1626, "task_loss": 0.6989907026290894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6285141706466675, "epoch": 1.38, "learning_rate": 4.79149056072133e-05, "loss": 0.5556, "step": 1627, "task_loss": 0.9508346915245056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7085584402084351, "epoch": 1.38, "learning_rate": 4.791020944867099e-05, "loss": 0.5969, "step": 1628, "task_loss": 0.7011109590530396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7885511517524719, "epoch": 1.38, "learning_rate": 4.790551329012868e-05, "loss": 0.7065, "step": 1629, "task_loss": 0.668955385684967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7590125799179077, "epoch": 1.38, "learning_rate": 4.7900817131586365e-05, "loss": 0.6951, "step": 1630, "task_loss": 0.8794214725494385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6715810298919678, "epoch": 1.38, "learning_rate": 4.789612097304405e-05, "loss": 0.5579, "step": 1631, "task_loss": 1.2304558753967285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5521953105926514, "epoch": 1.38, "learning_rate": 4.789142481450174e-05, "loss": 0.6949, "step": 1632, "task_loss": 1.133043646812439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2727198600769043, "epoch": 1.38, "learning_rate": 4.788672865595943e-05, "loss": 0.4784, "step": 1633, "task_loss": 0.20749114453792572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6448957324028015, "epoch": 1.38, "learning_rate": 4.788203249741712e-05, "loss": 0.8084, "step": 1634, "task_loss": 1.107852816581726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0230878591537476, "epoch": 1.38, "learning_rate": 4.7877336338874804e-05, "loss": 0.7094, "step": 1635, "task_loss": 0.8893623352050781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6867352724075317, "epoch": 1.38, "learning_rate": 4.787264018033249e-05, "loss": 0.6499, "step": 1636, "task_loss": 1.5206507444381714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5930062532424927, "epoch": 1.38, "learning_rate": 4.7867944021790177e-05, "loss": 0.6019, "step": 1637, "task_loss": 0.4308053255081177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9250055551528931, "epoch": 1.38, "learning_rate": 4.786324786324787e-05, "loss": 0.8052, "step": 1638, "task_loss": 1.394662857055664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6298177242279053, "epoch": 1.39, "learning_rate": 4.785855170470555e-05, "loss": 0.6658, "step": 1639, "task_loss": 0.5260576605796814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.625712513923645, "epoch": 1.39, "learning_rate": 4.785385554616324e-05, "loss": 0.6869, "step": 1640, "task_loss": 1.094031572341919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3023645281791687, "epoch": 1.39, "learning_rate": 4.784915938762093e-05, "loss": 0.5112, "step": 1641, "task_loss": 0.6468602418899536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2255578339099884, "epoch": 1.39, "learning_rate": 4.7844463229078615e-05, "loss": 0.5208, "step": 1642, "task_loss": 0.02285122126340866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5913540720939636, "epoch": 1.39, "learning_rate": 4.78397670705363e-05, "loss": 0.6996, "step": 1643, "task_loss": 0.5406564474105835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9128932952880859, "epoch": 1.39, "learning_rate": 4.783507091199399e-05, "loss": 0.5013, "step": 1644, "task_loss": 0.7289896011352539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8327547311782837, "epoch": 1.39, "learning_rate": 4.783037475345168e-05, "loss": 0.6093, "step": 1645, "task_loss": 0.783568263053894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3405892550945282, "epoch": 1.39, "learning_rate": 4.782567859490937e-05, "loss": 0.5788, "step": 1646, "task_loss": 0.27206966280937195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6549792885780334, "epoch": 1.39, "learning_rate": 4.7820982436367054e-05, "loss": 0.566, "step": 1647, "task_loss": 1.1901692152023315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6471213102340698, "epoch": 1.39, "learning_rate": 4.781628627782474e-05, "loss": 0.6159, "step": 1648, "task_loss": 0.6256771683692932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6229049563407898, "epoch": 1.39, "learning_rate": 4.7811590119282426e-05, "loss": 0.6869, "step": 1649, "task_loss": 1.2939614057540894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4963051974773407, "epoch": 1.39, "learning_rate": 4.780689396074012e-05, "loss": 0.6677, "step": 1650, "task_loss": 0.6176434755325317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5941067934036255, "epoch": 1.4, "learning_rate": 4.7802197802197806e-05, "loss": 0.6619, "step": 1651, "task_loss": 0.6285609602928162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4486173987388611, "epoch": 1.4, "learning_rate": 4.779750164365549e-05, "loss": 0.5563, "step": 1652, "task_loss": 0.2463979572057724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3717479407787323, "epoch": 1.4, "learning_rate": 4.779280548511318e-05, "loss": 0.5078, "step": 1653, "task_loss": 0.27591925859451294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.582754373550415, "epoch": 1.4, "learning_rate": 4.7788109326570865e-05, "loss": 0.6038, "step": 1654, "task_loss": 1.1268489360809326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3973112106323242, "epoch": 1.4, "learning_rate": 4.778341316802856e-05, "loss": 0.5645, "step": 1655, "task_loss": 0.13454020023345947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6616090536117554, "epoch": 1.4, "learning_rate": 4.777871700948624e-05, "loss": 0.6783, "step": 1656, "task_loss": 0.3349473476409912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42962509393692017, "epoch": 1.4, "learning_rate": 4.777402085094393e-05, "loss": 0.6321, "step": 1657, "task_loss": 1.292642593383789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3535487949848175, "epoch": 1.4, "learning_rate": 4.776932469240162e-05, "loss": 0.5843, "step": 1658, "task_loss": 0.11202388256788254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6484713554382324, "epoch": 1.4, "learning_rate": 4.776462853385931e-05, "loss": 0.4678, "step": 1659, "task_loss": 1.1821577548980713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44544318318367004, "epoch": 1.4, "learning_rate": 4.775993237531699e-05, "loss": 0.5599, "step": 1660, "task_loss": 0.16938871145248413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7618705034255981, "epoch": 1.4, "learning_rate": 4.7755236216774676e-05, "loss": 0.7092, "step": 1661, "task_loss": 0.6327065229415894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45954591035842896, "epoch": 1.4, "learning_rate": 4.775054005823237e-05, "loss": 0.4335, "step": 1662, "task_loss": 0.337223619222641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6329273581504822, "epoch": 1.41, "learning_rate": 4.7745843899690055e-05, "loss": 0.5844, "step": 1663, "task_loss": 0.7710459232330322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49368688464164734, "epoch": 1.41, "learning_rate": 4.774114774114775e-05, "loss": 0.6015, "step": 1664, "task_loss": 0.31707435846328735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4891490638256073, "epoch": 1.41, "learning_rate": 4.773645158260543e-05, "loss": 0.5338, "step": 1665, "task_loss": 0.6553794145584106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0766609907150269, "epoch": 1.41, "learning_rate": 4.773175542406312e-05, "loss": 0.7095, "step": 1666, "task_loss": 1.0282387733459473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5905341506004333, "epoch": 1.41, "learning_rate": 4.772705926552081e-05, "loss": 0.732, "step": 1667, "task_loss": 1.3435232639312744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7805784940719604, "epoch": 1.41, "learning_rate": 4.7722363106978494e-05, "loss": 0.5006, "step": 1668, "task_loss": 0.588904857635498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2207416146993637, "epoch": 1.41, "learning_rate": 4.771766694843618e-05, "loss": 0.5146, "step": 1669, "task_loss": 0.9002301096916199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4912632703781128, "epoch": 1.41, "learning_rate": 4.7712970789893866e-05, "loss": 0.5455, "step": 1670, "task_loss": 0.4370778799057007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.341635525226593, "epoch": 1.41, "learning_rate": 4.770827463135156e-05, "loss": 0.5415, "step": 1671, "task_loss": 0.6288416385650635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5023935437202454, "epoch": 1.41, "learning_rate": 4.7703578472809246e-05, "loss": 0.6909, "step": 1672, "task_loss": 0.48191872239112854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4665360748767853, "epoch": 1.41, "learning_rate": 4.769888231426693e-05, "loss": 0.7911, "step": 1673, "task_loss": 0.6535815000534058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5330772995948792, "epoch": 1.41, "learning_rate": 4.769418615572462e-05, "loss": 0.5724, "step": 1674, "task_loss": 0.3654189705848694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5425856113433838, "epoch": 1.42, "learning_rate": 4.7689489997182305e-05, "loss": 0.4697, "step": 1675, "task_loss": 0.39765316247940063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46999087929725647, "epoch": 1.42, "learning_rate": 4.768479383864e-05, "loss": 0.4343, "step": 1676, "task_loss": 0.8304810523986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9027906656265259, "epoch": 1.42, "learning_rate": 4.7680097680097684e-05, "loss": 0.62, "step": 1677, "task_loss": 0.5658296346664429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.71793532371521, "epoch": 1.42, "learning_rate": 4.767540152155537e-05, "loss": 0.7569, "step": 1678, "task_loss": 0.675432562828064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5552042722702026, "epoch": 1.42, "learning_rate": 4.767070536301306e-05, "loss": 0.5337, "step": 1679, "task_loss": 0.9523319005966187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44727009534835815, "epoch": 1.42, "learning_rate": 4.7666009204470743e-05, "loss": 0.5384, "step": 1680, "task_loss": 1.0604907274246216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6641278862953186, "epoch": 1.42, "learning_rate": 4.7661313045928437e-05, "loss": 0.5178, "step": 1681, "task_loss": 1.0119487047195435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39726608991622925, "epoch": 1.42, "learning_rate": 4.7656616887386116e-05, "loss": 0.4341, "step": 1682, "task_loss": 0.20627453923225403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5769674777984619, "epoch": 1.42, "learning_rate": 4.765192072884381e-05, "loss": 0.4123, "step": 1683, "task_loss": 0.10757043957710266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4141567349433899, "epoch": 1.42, "learning_rate": 4.7647224570301496e-05, "loss": 0.5555, "step": 1684, "task_loss": 0.9611308574676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6603943705558777, "epoch": 1.42, "learning_rate": 4.764252841175918e-05, "loss": 0.7677, "step": 1685, "task_loss": 0.34366485476493835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6432027816772461, "epoch": 1.42, "learning_rate": 4.763783225321687e-05, "loss": 0.5967, "step": 1686, "task_loss": 0.7625919580459595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3677903115749359, "epoch": 1.43, "learning_rate": 4.7633136094674555e-05, "loss": 0.5526, "step": 1687, "task_loss": 0.5651863217353821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8282896280288696, "epoch": 1.43, "learning_rate": 4.762843993613225e-05, "loss": 0.733, "step": 1688, "task_loss": 0.7544581294059753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3845669627189636, "epoch": 1.43, "learning_rate": 4.7623743777589934e-05, "loss": 0.5428, "step": 1689, "task_loss": 0.419605016708374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4004131555557251, "epoch": 1.43, "learning_rate": 4.761904761904762e-05, "loss": 0.6542, "step": 1690, "task_loss": 0.28962236642837524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37988877296447754, "epoch": 1.43, "learning_rate": 4.761435146050531e-05, "loss": 0.5331, "step": 1691, "task_loss": 1.2778712511062622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9036530256271362, "epoch": 1.43, "learning_rate": 4.7609655301963e-05, "loss": 0.4934, "step": 1692, "task_loss": 0.9585182070732117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3795052170753479, "epoch": 1.43, "learning_rate": 4.7604959143420686e-05, "loss": 0.5756, "step": 1693, "task_loss": 0.060575101524591446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3176575303077698, "epoch": 1.43, "learning_rate": 4.760026298487837e-05, "loss": 0.5541, "step": 1694, "task_loss": 0.04321736469864845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4549373984336853, "epoch": 1.43, "learning_rate": 4.759556682633606e-05, "loss": 0.5903, "step": 1695, "task_loss": 0.9013234376907349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5707936882972717, "epoch": 1.43, "learning_rate": 4.7590870667793745e-05, "loss": 0.5708, "step": 1696, "task_loss": 1.176888346672058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7325726747512817, "epoch": 1.43, "learning_rate": 4.758617450925144e-05, "loss": 0.7259, "step": 1697, "task_loss": 1.5041282176971436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4337136447429657, "epoch": 1.44, "learning_rate": 4.7581478350709125e-05, "loss": 0.6171, "step": 1698, "task_loss": 0.9632522463798523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8745871782302856, "epoch": 1.44, "learning_rate": 4.757678219216681e-05, "loss": 0.7305, "step": 1699, "task_loss": 2.1209444999694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42600828409194946, "epoch": 1.44, "learning_rate": 4.75720860336245e-05, "loss": 0.8298, "step": 1700, "task_loss": 0.7192578315734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38154494762420654, "epoch": 1.44, "learning_rate": 4.7567389875082184e-05, "loss": 0.6096, "step": 1701, "task_loss": 0.9794694781303406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3135108947753906, "epoch": 1.44, "learning_rate": 4.756269371653988e-05, "loss": 0.4503, "step": 1702, "task_loss": 0.6690059900283813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45923948287963867, "epoch": 1.44, "learning_rate": 4.7557997557997556e-05, "loss": 0.5694, "step": 1703, "task_loss": 0.9617040157318115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4590805172920227, "epoch": 1.44, "learning_rate": 4.755330139945525e-05, "loss": 0.5601, "step": 1704, "task_loss": 0.3061573803424835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4680730104446411, "epoch": 1.44, "learning_rate": 4.7548605240912936e-05, "loss": 0.7121, "step": 1705, "task_loss": 0.8845243453979492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3977121114730835, "epoch": 1.44, "learning_rate": 4.754390908237062e-05, "loss": 0.5876, "step": 1706, "task_loss": 1.1434568166732788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4672061800956726, "epoch": 1.44, "learning_rate": 4.7539212923828315e-05, "loss": 0.5551, "step": 1707, "task_loss": 0.5394863486289978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3127460181713104, "epoch": 1.44, "learning_rate": 4.7534516765285995e-05, "loss": 0.4342, "step": 1708, "task_loss": 0.1933390200138092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2080340385437012, "epoch": 1.44, "learning_rate": 4.752982060674369e-05, "loss": 0.6746, "step": 1709, "task_loss": 0.8523832559585571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2012395858764648, "epoch": 1.45, "learning_rate": 4.7525124448201374e-05, "loss": 0.7143, "step": 1710, "task_loss": 0.8809433579444885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.685310959815979, "epoch": 1.45, "learning_rate": 4.752042828965906e-05, "loss": 0.7435, "step": 1711, "task_loss": 1.413419485092163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5993162989616394, "epoch": 1.45, "learning_rate": 4.751573213111675e-05, "loss": 0.5446, "step": 1712, "task_loss": 0.9045497179031372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3171706199645996, "epoch": 1.45, "learning_rate": 4.751103597257443e-05, "loss": 0.5578, "step": 1713, "task_loss": 0.8205767273902893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5081040859222412, "epoch": 1.45, "learning_rate": 4.7506339814032126e-05, "loss": 0.5731, "step": 1714, "task_loss": 1.8179906606674194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5296933054924011, "epoch": 1.45, "learning_rate": 4.750164365548981e-05, "loss": 0.5222, "step": 1715, "task_loss": 0.4145263731479645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5474762916564941, "epoch": 1.45, "learning_rate": 4.74969474969475e-05, "loss": 0.601, "step": 1716, "task_loss": 0.4050251543521881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5535275340080261, "epoch": 1.45, "learning_rate": 4.7492251338405185e-05, "loss": 0.6063, "step": 1717, "task_loss": 0.8784699440002441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5403037667274475, "epoch": 1.45, "learning_rate": 4.748755517986287e-05, "loss": 0.5823, "step": 1718, "task_loss": 1.4323643445968628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6731171607971191, "epoch": 1.45, "learning_rate": 4.7482859021320565e-05, "loss": 0.6806, "step": 1719, "task_loss": 0.6315076947212219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6189766526222229, "epoch": 1.45, "learning_rate": 4.7478162862778244e-05, "loss": 0.5909, "step": 1720, "task_loss": 1.31588613986969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7352336645126343, "epoch": 1.45, "learning_rate": 4.747346670423594e-05, "loss": 0.4898, "step": 1721, "task_loss": 1.1063120365142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5604705214500427, "epoch": 1.46, "learning_rate": 4.7468770545693624e-05, "loss": 0.6487, "step": 1722, "task_loss": 0.7263510227203369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.434240460395813, "epoch": 1.46, "learning_rate": 4.746407438715132e-05, "loss": 0.584, "step": 1723, "task_loss": 0.5699338912963867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5256175994873047, "epoch": 1.46, "learning_rate": 4.7459378228609e-05, "loss": 0.6261, "step": 1724, "task_loss": 0.641033411026001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6866698265075684, "epoch": 1.46, "learning_rate": 4.745468207006668e-05, "loss": 0.7851, "step": 1725, "task_loss": 1.4354498386383057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5095258951187134, "epoch": 1.46, "learning_rate": 4.7449985911524376e-05, "loss": 0.5491, "step": 1726, "task_loss": 0.7234744429588318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4450724720954895, "epoch": 1.46, "learning_rate": 4.744528975298206e-05, "loss": 0.5529, "step": 1727, "task_loss": 0.2879394292831421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.353641152381897, "epoch": 1.46, "learning_rate": 4.7440593594439755e-05, "loss": 0.4256, "step": 1728, "task_loss": 0.29519811272621155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.360676646232605, "epoch": 1.46, "learning_rate": 4.7435897435897435e-05, "loss": 0.5496, "step": 1729, "task_loss": 0.11224878579378128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8293690085411072, "epoch": 1.46, "learning_rate": 4.743120127735513e-05, "loss": 0.5148, "step": 1730, "task_loss": 0.5572472810745239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5536460280418396, "epoch": 1.46, "learning_rate": 4.7426505118812814e-05, "loss": 0.5264, "step": 1731, "task_loss": 0.5107840299606323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7914451360702515, "epoch": 1.46, "learning_rate": 4.74218089602705e-05, "loss": 0.6965, "step": 1732, "task_loss": 0.5417155623435974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4786778688430786, "epoch": 1.46, "learning_rate": 4.741711280172819e-05, "loss": 0.7089, "step": 1733, "task_loss": 0.8589772582054138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5884360074996948, "epoch": 1.47, "learning_rate": 4.7412416643185874e-05, "loss": 0.5197, "step": 1734, "task_loss": 1.531866431236267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47132694721221924, "epoch": 1.47, "learning_rate": 4.740772048464357e-05, "loss": 0.4069, "step": 1735, "task_loss": 0.3693479001522064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5266053676605225, "epoch": 1.47, "learning_rate": 4.740302432610125e-05, "loss": 0.4947, "step": 1736, "task_loss": 0.7525853514671326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9121435880661011, "epoch": 1.47, "learning_rate": 4.739832816755894e-05, "loss": 0.5628, "step": 1737, "task_loss": 0.5331129431724548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5926668643951416, "epoch": 1.47, "learning_rate": 4.7393632009016626e-05, "loss": 0.5182, "step": 1738, "task_loss": 0.9783690571784973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.422587513923645, "epoch": 1.47, "learning_rate": 4.738893585047431e-05, "loss": 0.5913, "step": 1739, "task_loss": 0.43464240431785583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4938250482082367, "epoch": 1.47, "learning_rate": 4.7384239691932005e-05, "loss": 0.6059, "step": 1740, "task_loss": 0.38438811898231506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35957518219947815, "epoch": 1.47, "learning_rate": 4.737954353338969e-05, "loss": 0.4843, "step": 1741, "task_loss": 1.6396713256835938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5178916454315186, "epoch": 1.47, "learning_rate": 4.737484737484738e-05, "loss": 0.5548, "step": 1742, "task_loss": 1.6556224822998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6585602164268494, "epoch": 1.47, "learning_rate": 4.7370151216305064e-05, "loss": 0.563, "step": 1743, "task_loss": 0.8948805332183838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8171379566192627, "epoch": 1.47, "learning_rate": 4.736545505776275e-05, "loss": 0.6025, "step": 1744, "task_loss": 0.8966345191001892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5017954111099243, "epoch": 1.47, "learning_rate": 4.7360758899220444e-05, "loss": 0.6999, "step": 1745, "task_loss": 0.21828439831733704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5911415815353394, "epoch": 1.48, "learning_rate": 4.735606274067812e-05, "loss": 0.5837, "step": 1746, "task_loss": 1.5282588005065918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5401361584663391, "epoch": 1.48, "learning_rate": 4.7351366582135816e-05, "loss": 0.5384, "step": 1747, "task_loss": 0.28108352422714233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6684252023696899, "epoch": 1.48, "learning_rate": 4.73466704235935e-05, "loss": 0.672, "step": 1748, "task_loss": 1.193454623222351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4978240728378296, "epoch": 1.48, "learning_rate": 4.734197426505119e-05, "loss": 0.5566, "step": 1749, "task_loss": 0.9841524958610535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4638376832008362, "epoch": 1.48, "learning_rate": 4.7337278106508875e-05, "loss": 0.5078, "step": 1750, "task_loss": 0.8538601994514465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5723704695701599, "epoch": 1.48, "learning_rate": 4.733258194796656e-05, "loss": 0.5516, "step": 1751, "task_loss": 1.0360629558563232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32406219840049744, "epoch": 1.48, "learning_rate": 4.7327885789424255e-05, "loss": 0.5789, "step": 1752, "task_loss": 0.2305552214384079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3213307857513428, "epoch": 1.48, "learning_rate": 4.732318963088194e-05, "loss": 0.4538, "step": 1753, "task_loss": 0.4560941457748413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5032540559768677, "epoch": 1.48, "learning_rate": 4.7318493472339634e-05, "loss": 0.5827, "step": 1754, "task_loss": 0.6187043786048889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5370607972145081, "epoch": 1.48, "learning_rate": 4.7313797313797314e-05, "loss": 0.6107, "step": 1755, "task_loss": 0.6384979486465454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5159317851066589, "epoch": 1.48, "learning_rate": 4.7309101155255e-05, "loss": 0.4999, "step": 1756, "task_loss": 0.587050199508667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7169076800346375, "epoch": 1.48, "learning_rate": 4.730440499671269e-05, "loss": 0.7053, "step": 1757, "task_loss": 1.3134206533432007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7701461315155029, "epoch": 1.49, "learning_rate": 4.729970883817038e-05, "loss": 0.7, "step": 1758, "task_loss": 1.1118028163909912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4240128993988037, "epoch": 1.49, "learning_rate": 4.7295012679628066e-05, "loss": 0.608, "step": 1759, "task_loss": 0.4279307425022125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48941919207572937, "epoch": 1.49, "learning_rate": 4.729031652108575e-05, "loss": 0.7093, "step": 1760, "task_loss": 2.10709810256958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6665080785751343, "epoch": 1.49, "learning_rate": 4.7285620362543445e-05, "loss": 0.6386, "step": 1761, "task_loss": 1.0828911066055298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7394925355911255, "epoch": 1.49, "learning_rate": 4.728092420400113e-05, "loss": 0.7404, "step": 1762, "task_loss": 0.8737367987632751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4843217730522156, "epoch": 1.49, "learning_rate": 4.727622804545882e-05, "loss": 0.5516, "step": 1763, "task_loss": 0.2954407036304474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3920549154281616, "epoch": 1.49, "learning_rate": 4.7271531886916504e-05, "loss": 0.5014, "step": 1764, "task_loss": 0.11834452301263809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2948736548423767, "epoch": 1.49, "learning_rate": 4.726683572837419e-05, "loss": 0.5344, "step": 1765, "task_loss": 0.13339585065841675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43370670080184937, "epoch": 1.49, "learning_rate": 4.7262139569831884e-05, "loss": 0.5657, "step": 1766, "task_loss": 0.42753562331199646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9254080057144165, "epoch": 1.49, "learning_rate": 4.725744341128956e-05, "loss": 0.6812, "step": 1767, "task_loss": 0.9892339706420898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39960789680480957, "epoch": 1.49, "learning_rate": 4.7252747252747257e-05, "loss": 0.5428, "step": 1768, "task_loss": 0.6254394054412842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0269381999969482, "epoch": 1.5, "learning_rate": 4.724805109420494e-05, "loss": 0.618, "step": 1769, "task_loss": 0.8685317635536194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4588848650455475, "epoch": 1.5, "learning_rate": 4.724335493566263e-05, "loss": 0.6104, "step": 1770, "task_loss": 1.3435319662094116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.446067750453949, "epoch": 1.5, "learning_rate": 4.723865877712032e-05, "loss": 0.542, "step": 1771, "task_loss": 0.4482569098472595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47300654649734497, "epoch": 1.5, "learning_rate": 4.7233962618578e-05, "loss": 0.6161, "step": 1772, "task_loss": 1.1608705520629883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7786028385162354, "epoch": 1.5, "learning_rate": 4.7229266460035695e-05, "loss": 0.6448, "step": 1773, "task_loss": 0.9818819761276245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8129267692565918, "epoch": 1.5, "learning_rate": 4.722457030149338e-05, "loss": 0.5489, "step": 1774, "task_loss": 1.4871063232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5099161863327026, "epoch": 1.5, "learning_rate": 4.721987414295107e-05, "loss": 0.4436, "step": 1775, "task_loss": 0.28026899695396423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8355551362037659, "epoch": 1.5, "learning_rate": 4.7215177984408754e-05, "loss": 0.6905, "step": 1776, "task_loss": 1.0886460542678833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5663450956344604, "epoch": 1.5, "learning_rate": 4.721048182586644e-05, "loss": 0.5125, "step": 1777, "task_loss": 0.7053067684173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6802136898040771, "epoch": 1.5, "learning_rate": 4.7205785667324133e-05, "loss": 0.4994, "step": 1778, "task_loss": 0.5682879686355591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4849269688129425, "epoch": 1.5, "learning_rate": 4.720108950878182e-05, "loss": 0.4508, "step": 1779, "task_loss": 0.18343780934810638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30709266662597656, "epoch": 1.5, "learning_rate": 4.7196393350239506e-05, "loss": 0.5454, "step": 1780, "task_loss": 1.104981780052185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8152207732200623, "epoch": 1.51, "learning_rate": 4.719169719169719e-05, "loss": 0.7672, "step": 1781, "task_loss": 0.9177175760269165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4609120488166809, "epoch": 1.51, "learning_rate": 4.718700103315488e-05, "loss": 0.5844, "step": 1782, "task_loss": 0.49280959367752075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4774126410484314, "epoch": 1.51, "learning_rate": 4.718230487461257e-05, "loss": 0.5882, "step": 1783, "task_loss": 0.9920885562896729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4701034426689148, "epoch": 1.51, "learning_rate": 4.717760871607026e-05, "loss": 0.5893, "step": 1784, "task_loss": 1.4934524297714233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6782662868499756, "epoch": 1.51, "learning_rate": 4.7172912557527945e-05, "loss": 0.7713, "step": 1785, "task_loss": 0.595090925693512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6420391798019409, "epoch": 1.51, "learning_rate": 4.716821639898563e-05, "loss": 0.5801, "step": 1786, "task_loss": 0.9405713677406311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45343804359436035, "epoch": 1.51, "learning_rate": 4.7163520240443324e-05, "loss": 0.58, "step": 1787, "task_loss": 0.3453715443611145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8332772254943848, "epoch": 1.51, "learning_rate": 4.715882408190101e-05, "loss": 0.541, "step": 1788, "task_loss": 1.5292502641677856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.555126965045929, "epoch": 1.51, "learning_rate": 4.715412792335869e-05, "loss": 0.649, "step": 1789, "task_loss": 1.7272356748580933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48382294178009033, "epoch": 1.51, "learning_rate": 4.714943176481638e-05, "loss": 0.4535, "step": 1790, "task_loss": 0.5396559834480286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48102712631225586, "epoch": 1.51, "learning_rate": 4.714473560627407e-05, "loss": 0.5827, "step": 1791, "task_loss": 0.22900213301181793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.739804208278656, "epoch": 1.51, "learning_rate": 4.714003944773176e-05, "loss": 0.7315, "step": 1792, "task_loss": 1.0642415285110474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5727558135986328, "epoch": 1.52, "learning_rate": 4.713534328918944e-05, "loss": 0.6187, "step": 1793, "task_loss": 0.14176565408706665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31527066230773926, "epoch": 1.52, "learning_rate": 4.7130647130647135e-05, "loss": 0.3815, "step": 1794, "task_loss": 0.413805216550827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8277374505996704, "epoch": 1.52, "learning_rate": 4.712595097210482e-05, "loss": 0.7509, "step": 1795, "task_loss": 0.792826771736145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33134451508522034, "epoch": 1.52, "learning_rate": 4.712125481356251e-05, "loss": 0.4612, "step": 1796, "task_loss": 0.24399790167808533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3478565216064453, "epoch": 1.52, "learning_rate": 4.7116558655020194e-05, "loss": 0.6013, "step": 1797, "task_loss": 0.6398569941520691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5370184183120728, "epoch": 1.52, "learning_rate": 4.711186249647788e-05, "loss": 0.6777, "step": 1798, "task_loss": 1.4945076704025269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6733025312423706, "epoch": 1.52, "learning_rate": 4.7107166337935574e-05, "loss": 0.6265, "step": 1799, "task_loss": 1.7343300580978394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6706859469413757, "epoch": 1.52, "learning_rate": 4.710247017939326e-05, "loss": 0.4879, "step": 1800, "task_loss": 0.7121043801307678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6839126944541931, "epoch": 1.52, "learning_rate": 4.7097774020850946e-05, "loss": 0.5784, "step": 1801, "task_loss": 0.39130473136901855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7422491312026978, "epoch": 1.52, "learning_rate": 4.709307786230863e-05, "loss": 0.5647, "step": 1802, "task_loss": 0.44851943850517273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5224934816360474, "epoch": 1.52, "learning_rate": 4.708838170376632e-05, "loss": 0.6259, "step": 1803, "task_loss": 1.4766483306884766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.521412193775177, "epoch": 1.52, "learning_rate": 4.708368554522401e-05, "loss": 0.5853, "step": 1804, "task_loss": 0.2764165699481964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.816644549369812, "epoch": 1.53, "learning_rate": 4.70789893866817e-05, "loss": 0.6368, "step": 1805, "task_loss": 1.8837902545928955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5346130728721619, "epoch": 1.53, "learning_rate": 4.7074293228139385e-05, "loss": 0.5366, "step": 1806, "task_loss": 1.0963741540908813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3711344599723816, "epoch": 1.53, "learning_rate": 4.706959706959707e-05, "loss": 0.3806, "step": 1807, "task_loss": 0.7327045202255249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7563214302062988, "epoch": 1.53, "learning_rate": 4.706490091105476e-05, "loss": 0.5583, "step": 1808, "task_loss": 0.6566154956817627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5014467239379883, "epoch": 1.53, "learning_rate": 4.706020475251245e-05, "loss": 0.529, "step": 1809, "task_loss": 1.7174571752548218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5377577543258667, "epoch": 1.53, "learning_rate": 4.705550859397013e-05, "loss": 0.5551, "step": 1810, "task_loss": 0.3616334795951843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5012502074241638, "epoch": 1.53, "learning_rate": 4.705081243542782e-05, "loss": 0.6894, "step": 1811, "task_loss": 0.5955134034156799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7653435468673706, "epoch": 1.53, "learning_rate": 4.704611627688551e-05, "loss": 0.6885, "step": 1812, "task_loss": 1.3360198736190796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5278595089912415, "epoch": 1.53, "learning_rate": 4.7041420118343196e-05, "loss": 0.6421, "step": 1813, "task_loss": 0.40481236577033997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5398977994918823, "epoch": 1.53, "learning_rate": 4.703672395980088e-05, "loss": 0.7748, "step": 1814, "task_loss": 0.6062988638877869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5858588218688965, "epoch": 1.53, "learning_rate": 4.703202780125857e-05, "loss": 0.5981, "step": 1815, "task_loss": 1.0421767234802246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4117792248725891, "epoch": 1.53, "learning_rate": 4.702733164271626e-05, "loss": 0.5286, "step": 1816, "task_loss": 1.0585367679595947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5052696466445923, "epoch": 1.54, "learning_rate": 4.702263548417395e-05, "loss": 0.6296, "step": 1817, "task_loss": 0.6016008853912354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6066255569458008, "epoch": 1.54, "learning_rate": 4.701793932563164e-05, "loss": 0.5051, "step": 1818, "task_loss": 0.48053833842277527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4595336616039276, "epoch": 1.54, "learning_rate": 4.701324316708932e-05, "loss": 0.5761, "step": 1819, "task_loss": 0.9621044397354126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5598829388618469, "epoch": 1.54, "learning_rate": 4.700854700854701e-05, "loss": 0.666, "step": 1820, "task_loss": 0.7623969912528992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7515313625335693, "epoch": 1.54, "learning_rate": 4.70038508500047e-05, "loss": 0.5915, "step": 1821, "task_loss": 1.030352234840393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6997864246368408, "epoch": 1.54, "learning_rate": 4.6999154691462387e-05, "loss": 0.644, "step": 1822, "task_loss": 0.4258491098880768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4800461530685425, "epoch": 1.54, "learning_rate": 4.699445853292007e-05, "loss": 0.6049, "step": 1823, "task_loss": 0.7813599109649658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3815319538116455, "epoch": 1.54, "learning_rate": 4.698976237437776e-05, "loss": 0.6337, "step": 1824, "task_loss": 0.5089420080184937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5270010232925415, "epoch": 1.54, "learning_rate": 4.698506621583545e-05, "loss": 0.4836, "step": 1825, "task_loss": 0.5693044662475586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5194756388664246, "epoch": 1.54, "learning_rate": 4.698037005729314e-05, "loss": 0.48, "step": 1826, "task_loss": 1.119638442993164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24129916727542877, "epoch": 1.54, "learning_rate": 4.697567389875082e-05, "loss": 0.3878, "step": 1827, "task_loss": 0.11435811221599579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4901048243045807, "epoch": 1.54, "learning_rate": 4.697097774020851e-05, "loss": 0.5264, "step": 1828, "task_loss": 1.39665949344635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2881966829299927, "epoch": 1.55, "learning_rate": 4.69662815816662e-05, "loss": 0.4473, "step": 1829, "task_loss": 0.7257423400878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5609315633773804, "epoch": 1.55, "learning_rate": 4.696158542312389e-05, "loss": 0.5217, "step": 1830, "task_loss": 1.0994693040847778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38073375821113586, "epoch": 1.55, "learning_rate": 4.695688926458158e-05, "loss": 0.6504, "step": 1831, "task_loss": 0.5290494561195374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5119773149490356, "epoch": 1.55, "learning_rate": 4.6952193106039264e-05, "loss": 0.6186, "step": 1832, "task_loss": 1.0422250032424927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38902145624160767, "epoch": 1.55, "learning_rate": 4.694749694749695e-05, "loss": 0.4482, "step": 1833, "task_loss": 0.7616206407546997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3862183690071106, "epoch": 1.55, "learning_rate": 4.6942800788954636e-05, "loss": 0.4838, "step": 1834, "task_loss": 1.4551347494125366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7636275291442871, "epoch": 1.55, "learning_rate": 4.693810463041233e-05, "loss": 0.5794, "step": 1835, "task_loss": 0.5440858602523804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.1165210008621216, "epoch": 1.55, "learning_rate": 4.693340847187001e-05, "loss": 0.6582, "step": 1836, "task_loss": 1.4407297372817993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.519279956817627, "epoch": 1.55, "learning_rate": 4.69287123133277e-05, "loss": 0.6449, "step": 1837, "task_loss": 0.5835684537887573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4269437789916992, "epoch": 1.55, "learning_rate": 4.692401615478539e-05, "loss": 0.4845, "step": 1838, "task_loss": 0.7465857267379761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4857889711856842, "epoch": 1.55, "learning_rate": 4.6919319996243075e-05, "loss": 0.6668, "step": 1839, "task_loss": 0.5877649784088135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3874586522579193, "epoch": 1.56, "learning_rate": 4.691462383770076e-05, "loss": 0.5063, "step": 1840, "task_loss": 0.9883366227149963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8268128633499146, "epoch": 1.56, "learning_rate": 4.690992767915845e-05, "loss": 0.605, "step": 1841, "task_loss": 0.9920138716697693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38981783390045166, "epoch": 1.56, "learning_rate": 4.690523152061614e-05, "loss": 0.6329, "step": 1842, "task_loss": 1.0755709409713745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5933724641799927, "epoch": 1.56, "learning_rate": 4.690053536207383e-05, "loss": 0.6515, "step": 1843, "task_loss": 0.40639886260032654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6007105112075806, "epoch": 1.56, "learning_rate": 4.689583920353151e-05, "loss": 0.5033, "step": 1844, "task_loss": 0.22573482990264893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5393925905227661, "epoch": 1.56, "learning_rate": 4.68911430449892e-05, "loss": 0.5745, "step": 1845, "task_loss": 0.6461055874824524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43580031394958496, "epoch": 1.56, "learning_rate": 4.6886446886446886e-05, "loss": 0.5406, "step": 1846, "task_loss": 0.7986985445022583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5687268972396851, "epoch": 1.56, "learning_rate": 4.688175072790458e-05, "loss": 0.4739, "step": 1847, "task_loss": 0.9790605902671814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8112084269523621, "epoch": 1.56, "learning_rate": 4.6877054569362265e-05, "loss": 0.6229, "step": 1848, "task_loss": 0.8389408588409424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4346604347229004, "epoch": 1.56, "learning_rate": 4.687235841081995e-05, "loss": 0.4942, "step": 1849, "task_loss": 1.1833844184875488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.546808123588562, "epoch": 1.56, "learning_rate": 4.686766225227764e-05, "loss": 0.5042, "step": 1850, "task_loss": 0.5895398855209351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.601993978023529, "epoch": 1.56, "learning_rate": 4.6862966093735324e-05, "loss": 0.5649, "step": 1851, "task_loss": 0.5142083764076233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49122998118400574, "epoch": 1.57, "learning_rate": 4.685826993519302e-05, "loss": 0.4124, "step": 1852, "task_loss": 0.7187750339508057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6636379957199097, "epoch": 1.57, "learning_rate": 4.68535737766507e-05, "loss": 0.6097, "step": 1853, "task_loss": 1.213657259941101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5201648473739624, "epoch": 1.57, "learning_rate": 4.684887761810839e-05, "loss": 0.6343, "step": 1854, "task_loss": 0.6964669823646545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.525881826877594, "epoch": 1.57, "learning_rate": 4.6844181459566076e-05, "loss": 0.614, "step": 1855, "task_loss": 0.6494321227073669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3271472454071045, "epoch": 1.57, "learning_rate": 4.683948530102377e-05, "loss": 0.6381, "step": 1856, "task_loss": 0.08460842072963715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.474894255399704, "epoch": 1.57, "learning_rate": 4.683478914248145e-05, "loss": 0.5441, "step": 1857, "task_loss": 0.33298078179359436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4968753457069397, "epoch": 1.57, "learning_rate": 4.683009298393914e-05, "loss": 0.4592, "step": 1858, "task_loss": 0.5779150724411011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0260851383209229, "epoch": 1.57, "learning_rate": 4.682539682539683e-05, "loss": 0.7933, "step": 1859, "task_loss": 1.6627287864685059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7906615138053894, "epoch": 1.57, "learning_rate": 4.6820700666854515e-05, "loss": 0.7098, "step": 1860, "task_loss": 2.013934373855591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3578082323074341, "epoch": 1.57, "learning_rate": 4.681600450831221e-05, "loss": 0.5458, "step": 1861, "task_loss": 0.18244178593158722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6916027069091797, "epoch": 1.57, "learning_rate": 4.681130834976989e-05, "loss": 0.6101, "step": 1862, "task_loss": 0.7993102073669434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4502459764480591, "epoch": 1.57, "learning_rate": 4.680661219122758e-05, "loss": 0.592, "step": 1863, "task_loss": 0.2994005084037781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5625342726707458, "epoch": 1.58, "learning_rate": 4.680191603268527e-05, "loss": 0.6211, "step": 1864, "task_loss": 1.1395446062088013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6579430103302002, "epoch": 1.58, "learning_rate": 4.6797219874142953e-05, "loss": 0.8096, "step": 1865, "task_loss": 1.5599775314331055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3917092978954315, "epoch": 1.58, "learning_rate": 4.679252371560064e-05, "loss": 0.5361, "step": 1866, "task_loss": 0.30034443736076355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5206760168075562, "epoch": 1.58, "learning_rate": 4.6787827557058326e-05, "loss": 0.7454, "step": 1867, "task_loss": 1.0147730112075806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5398136377334595, "epoch": 1.58, "learning_rate": 4.678313139851602e-05, "loss": 0.667, "step": 1868, "task_loss": 0.4181918799877167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42976731061935425, "epoch": 1.58, "learning_rate": 4.6778435239973706e-05, "loss": 0.693, "step": 1869, "task_loss": 0.8976526260375977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45502638816833496, "epoch": 1.58, "learning_rate": 4.677373908143139e-05, "loss": 0.4734, "step": 1870, "task_loss": 0.34420374035835266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6993266344070435, "epoch": 1.58, "learning_rate": 4.676904292288908e-05, "loss": 0.6215, "step": 1871, "task_loss": 1.2022216320037842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5353761911392212, "epoch": 1.58, "learning_rate": 4.6764346764346765e-05, "loss": 0.5871, "step": 1872, "task_loss": 0.3371061086654663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45115602016448975, "epoch": 1.58, "learning_rate": 4.675965060580446e-05, "loss": 0.4945, "step": 1873, "task_loss": 0.5973798632621765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.439235657453537, "epoch": 1.58, "learning_rate": 4.675495444726214e-05, "loss": 0.6803, "step": 1874, "task_loss": 0.7925198078155518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6705981492996216, "epoch": 1.58, "learning_rate": 4.675025828871983e-05, "loss": 0.5552, "step": 1875, "task_loss": 0.7377611398696899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3314593434333801, "epoch": 1.59, "learning_rate": 4.674556213017752e-05, "loss": 0.5198, "step": 1876, "task_loss": 0.3796609342098236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25870436429977417, "epoch": 1.59, "learning_rate": 4.67408659716352e-05, "loss": 0.4509, "step": 1877, "task_loss": 0.415881872177124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3855830430984497, "epoch": 1.59, "learning_rate": 4.6736169813092896e-05, "loss": 0.7109, "step": 1878, "task_loss": 0.44345542788505554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9690287113189697, "epoch": 1.59, "learning_rate": 4.6731473654550576e-05, "loss": 0.6253, "step": 1879, "task_loss": 0.693649172782898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9196126461029053, "epoch": 1.59, "learning_rate": 4.672677749600827e-05, "loss": 0.7093, "step": 1880, "task_loss": 1.3152531385421753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7723612785339355, "epoch": 1.59, "learning_rate": 4.6722081337465955e-05, "loss": 0.5864, "step": 1881, "task_loss": 0.8921085000038147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4764910042285919, "epoch": 1.59, "learning_rate": 4.671738517892365e-05, "loss": 0.4743, "step": 1882, "task_loss": 0.16368448734283447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5067263841629028, "epoch": 1.59, "learning_rate": 4.671268902038133e-05, "loss": 0.5572, "step": 1883, "task_loss": 0.7184029221534729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3935500681400299, "epoch": 1.59, "learning_rate": 4.6707992861839014e-05, "loss": 0.5763, "step": 1884, "task_loss": 0.6569693088531494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4811840355396271, "epoch": 1.59, "learning_rate": 4.670329670329671e-05, "loss": 0.6015, "step": 1885, "task_loss": 0.6897271871566772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4812799394130707, "epoch": 1.59, "learning_rate": 4.6698600544754394e-05, "loss": 0.6574, "step": 1886, "task_loss": 0.6717453598976135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5927145481109619, "epoch": 1.59, "learning_rate": 4.669390438621208e-05, "loss": 0.5827, "step": 1887, "task_loss": 0.7006586790084839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6654424667358398, "epoch": 1.6, "learning_rate": 4.6689208227669766e-05, "loss": 0.6126, "step": 1888, "task_loss": 1.265397071838379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7272031307220459, "epoch": 1.6, "learning_rate": 4.668451206912746e-05, "loss": 0.5902, "step": 1889, "task_loss": 0.7466819286346436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47081542015075684, "epoch": 1.6, "learning_rate": 4.6679815910585146e-05, "loss": 0.7193, "step": 1890, "task_loss": 1.7209584712982178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7862969636917114, "epoch": 1.6, "learning_rate": 4.6675119752042825e-05, "loss": 0.5854, "step": 1891, "task_loss": 0.9786199331283569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5097280144691467, "epoch": 1.6, "learning_rate": 4.667042359350052e-05, "loss": 0.4389, "step": 1892, "task_loss": 0.30379530787467957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6733946800231934, "epoch": 1.6, "learning_rate": 4.6665727434958205e-05, "loss": 0.6607, "step": 1893, "task_loss": 1.7081656455993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27515310049057007, "epoch": 1.6, "learning_rate": 4.66610312764159e-05, "loss": 0.5711, "step": 1894, "task_loss": 0.34513160586357117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5071406364440918, "epoch": 1.6, "learning_rate": 4.6656335117873584e-05, "loss": 0.4801, "step": 1895, "task_loss": 1.2544324398040771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37362948060035706, "epoch": 1.6, "learning_rate": 4.665163895933127e-05, "loss": 0.5747, "step": 1896, "task_loss": 0.9745263457298279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8012465834617615, "epoch": 1.6, "learning_rate": 4.664694280078896e-05, "loss": 0.6202, "step": 1897, "task_loss": 1.1787583827972412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2249249815940857, "epoch": 1.6, "learning_rate": 4.664224664224664e-05, "loss": 0.4416, "step": 1898, "task_loss": 0.43031370639801025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4262726902961731, "epoch": 1.6, "learning_rate": 4.6637550483704336e-05, "loss": 0.4927, "step": 1899, "task_loss": 0.5710815787315369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47875678539276123, "epoch": 1.61, "learning_rate": 4.6632854325162016e-05, "loss": 0.4549, "step": 1900, "task_loss": 0.4836062788963318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5776896476745605, "epoch": 1.61, "learning_rate": 4.662815816661971e-05, "loss": 0.5184, "step": 1901, "task_loss": 1.021346926689148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2448888123035431, "epoch": 1.61, "learning_rate": 4.6623462008077395e-05, "loss": 0.5209, "step": 1902, "task_loss": 0.2282993495464325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5127319097518921, "epoch": 1.61, "learning_rate": 4.661876584953508e-05, "loss": 0.7279, "step": 1903, "task_loss": 0.6702991127967834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4277348518371582, "epoch": 1.61, "learning_rate": 4.661406969099277e-05, "loss": 0.4922, "step": 1904, "task_loss": 0.6962489485740662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35797202587127686, "epoch": 1.61, "learning_rate": 4.6609373532450454e-05, "loss": 0.5921, "step": 1905, "task_loss": 0.7855871915817261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.59946209192276, "epoch": 1.61, "learning_rate": 4.660467737390815e-05, "loss": 0.54, "step": 1906, "task_loss": 1.3011976480484009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9130920171737671, "epoch": 1.61, "learning_rate": 4.6599981215365834e-05, "loss": 0.7217, "step": 1907, "task_loss": 0.9444011449813843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6050752401351929, "epoch": 1.61, "learning_rate": 4.659528505682352e-05, "loss": 0.5771, "step": 1908, "task_loss": 0.7212833166122437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5063422918319702, "epoch": 1.61, "learning_rate": 4.6590588898281207e-05, "loss": 0.8909, "step": 1909, "task_loss": 0.2792868912220001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38653329014778137, "epoch": 1.61, "learning_rate": 4.658589273973889e-05, "loss": 0.4395, "step": 1910, "task_loss": 0.9186563491821289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5764357447624207, "epoch": 1.61, "learning_rate": 4.6581196581196586e-05, "loss": 0.4691, "step": 1911, "task_loss": 1.0853201150894165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6206574440002441, "epoch": 1.62, "learning_rate": 4.657650042265427e-05, "loss": 0.5765, "step": 1912, "task_loss": 1.0348291397094727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44312185049057007, "epoch": 1.62, "learning_rate": 4.657180426411196e-05, "loss": 0.593, "step": 1913, "task_loss": 0.6225569844245911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6333627700805664, "epoch": 1.62, "learning_rate": 4.6567108105569645e-05, "loss": 0.6026, "step": 1914, "task_loss": 0.640951931476593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5990137457847595, "epoch": 1.62, "learning_rate": 4.656241194702733e-05, "loss": 0.5053, "step": 1915, "task_loss": 0.6408780813217163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4648085832595825, "epoch": 1.62, "learning_rate": 4.6557715788485025e-05, "loss": 0.5069, "step": 1916, "task_loss": 0.8815034627914429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4059109091758728, "epoch": 1.62, "learning_rate": 4.6553019629942704e-05, "loss": 0.5666, "step": 1917, "task_loss": 0.3956148326396942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4059675931930542, "epoch": 1.62, "learning_rate": 4.65483234714004e-05, "loss": 0.555, "step": 1918, "task_loss": 0.5022497177124023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5159574747085571, "epoch": 1.62, "learning_rate": 4.6543627312858084e-05, "loss": 0.5716, "step": 1919, "task_loss": 0.4647010862827301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.740647554397583, "epoch": 1.62, "learning_rate": 4.653893115431578e-05, "loss": 0.5379, "step": 1920, "task_loss": 0.8348552584648132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7009769678115845, "epoch": 1.62, "learning_rate": 4.6534234995773456e-05, "loss": 0.5971, "step": 1921, "task_loss": 1.73435640335083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5020410418510437, "epoch": 1.62, "learning_rate": 4.652953883723114e-05, "loss": 0.4729, "step": 1922, "task_loss": 0.7718576788902283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36021140217781067, "epoch": 1.63, "learning_rate": 4.6524842678688836e-05, "loss": 0.5819, "step": 1923, "task_loss": 0.30075839161872864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3421124815940857, "epoch": 1.63, "learning_rate": 4.652014652014652e-05, "loss": 0.3706, "step": 1924, "task_loss": 0.6893089413642883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42204713821411133, "epoch": 1.63, "learning_rate": 4.6515450361604215e-05, "loss": 0.5015, "step": 1925, "task_loss": 1.145038366317749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5465960502624512, "epoch": 1.63, "learning_rate": 4.6510754203061895e-05, "loss": 0.6289, "step": 1926, "task_loss": 0.8248675465583801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35700979828834534, "epoch": 1.63, "learning_rate": 4.650605804451959e-05, "loss": 0.4823, "step": 1927, "task_loss": 0.4545920789241791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4961574673652649, "epoch": 1.63, "learning_rate": 4.6501361885977274e-05, "loss": 0.7004, "step": 1928, "task_loss": 1.551714301109314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42916354537010193, "epoch": 1.63, "learning_rate": 4.649666572743496e-05, "loss": 0.6647, "step": 1929, "task_loss": 0.3034524619579315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6383439302444458, "epoch": 1.63, "learning_rate": 4.649196956889265e-05, "loss": 0.6976, "step": 1930, "task_loss": 0.6374261379241943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5258362293243408, "epoch": 1.63, "learning_rate": 4.648727341035033e-05, "loss": 0.6222, "step": 1931, "task_loss": 2.011550188064575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.646300196647644, "epoch": 1.63, "learning_rate": 4.6482577251808026e-05, "loss": 0.6566, "step": 1932, "task_loss": 0.49180835485458374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6027108430862427, "epoch": 1.63, "learning_rate": 4.647788109326571e-05, "loss": 0.5654, "step": 1933, "task_loss": 0.9724689722061157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4280605912208557, "epoch": 1.63, "learning_rate": 4.64731849347234e-05, "loss": 0.5269, "step": 1934, "task_loss": 0.2854591906070709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.448786199092865, "epoch": 1.64, "learning_rate": 4.6468488776181085e-05, "loss": 0.512, "step": 1935, "task_loss": 1.0266319513320923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.764808177947998, "epoch": 1.64, "learning_rate": 4.646379261763877e-05, "loss": 0.5364, "step": 1936, "task_loss": 0.5424794554710388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5813546180725098, "epoch": 1.64, "learning_rate": 4.6459096459096465e-05, "loss": 0.6073, "step": 1937, "task_loss": 0.4363210201263428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34104102849960327, "epoch": 1.64, "learning_rate": 4.645440030055415e-05, "loss": 0.5936, "step": 1938, "task_loss": 0.6663821339607239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9777122139930725, "epoch": 1.64, "learning_rate": 4.644970414201184e-05, "loss": 0.6662, "step": 1939, "task_loss": 1.4371451139450073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4519270360469818, "epoch": 1.64, "learning_rate": 4.6445007983469524e-05, "loss": 0.4886, "step": 1940, "task_loss": 0.6666457653045654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40716052055358887, "epoch": 1.64, "learning_rate": 4.644031182492721e-05, "loss": 0.4942, "step": 1941, "task_loss": 0.7209877967834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.443619966506958, "epoch": 1.64, "learning_rate": 4.64356156663849e-05, "loss": 0.6136, "step": 1942, "task_loss": 0.7305147647857666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3493501543998718, "epoch": 1.64, "learning_rate": 4.643091950784258e-05, "loss": 0.5634, "step": 1943, "task_loss": 0.884074866771698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5071278810501099, "epoch": 1.64, "learning_rate": 4.6426223349300276e-05, "loss": 0.5124, "step": 1944, "task_loss": 0.8684453964233398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.367328941822052, "epoch": 1.64, "learning_rate": 4.642152719075796e-05, "loss": 0.6858, "step": 1945, "task_loss": 0.4670770466327667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6910357475280762, "epoch": 1.64, "learning_rate": 4.641683103221565e-05, "loss": 0.6521, "step": 1946, "task_loss": 1.6614556312561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3436511158943176, "epoch": 1.65, "learning_rate": 4.6412134873673335e-05, "loss": 0.5202, "step": 1947, "task_loss": 0.6699879169464111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7080458402633667, "epoch": 1.65, "learning_rate": 4.640743871513102e-05, "loss": 0.7517, "step": 1948, "task_loss": 1.3655600547790527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7631797194480896, "epoch": 1.65, "learning_rate": 4.6402742556588714e-05, "loss": 0.6153, "step": 1949, "task_loss": 1.3300447463989258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36314499378204346, "epoch": 1.65, "learning_rate": 4.63980463980464e-05, "loss": 0.6187, "step": 1950, "task_loss": 0.6681150794029236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37765100598335266, "epoch": 1.65, "learning_rate": 4.639335023950409e-05, "loss": 0.5002, "step": 1951, "task_loss": 1.1149048805236816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3839151859283447, "epoch": 1.65, "learning_rate": 4.6388654080961773e-05, "loss": 0.5777, "step": 1952, "task_loss": 0.4816952347755432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5346459746360779, "epoch": 1.65, "learning_rate": 4.6383957922419467e-05, "loss": 0.5784, "step": 1953, "task_loss": 0.37667304277420044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5754196643829346, "epoch": 1.65, "learning_rate": 4.637926176387715e-05, "loss": 0.6845, "step": 1954, "task_loss": 0.7497438788414001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46790409088134766, "epoch": 1.65, "learning_rate": 4.637456560533484e-05, "loss": 0.699, "step": 1955, "task_loss": 1.074196457862854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4146980345249176, "epoch": 1.65, "learning_rate": 4.6369869446792526e-05, "loss": 0.4743, "step": 1956, "task_loss": 0.20992335677146912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7312346696853638, "epoch": 1.65, "learning_rate": 4.636517328825021e-05, "loss": 0.6351, "step": 1957, "task_loss": 0.7967596650123596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5554275512695312, "epoch": 1.65, "learning_rate": 4.6360477129707905e-05, "loss": 0.5177, "step": 1958, "task_loss": 0.567518949508667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42225921154022217, "epoch": 1.66, "learning_rate": 4.635578097116559e-05, "loss": 0.4947, "step": 1959, "task_loss": 0.5452908277511597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7208248376846313, "epoch": 1.66, "learning_rate": 4.635108481262328e-05, "loss": 0.5922, "step": 1960, "task_loss": 0.41331160068511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.352708101272583, "epoch": 1.66, "learning_rate": 4.6346388654080964e-05, "loss": 0.3642, "step": 1961, "task_loss": 0.5269802808761597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2675037086009979, "epoch": 1.66, "learning_rate": 4.634169249553865e-05, "loss": 0.4845, "step": 1962, "task_loss": 0.12757937610149384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.711147129535675, "epoch": 1.66, "learning_rate": 4.6336996336996343e-05, "loss": 0.4719, "step": 1963, "task_loss": 0.1511716991662979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4818832278251648, "epoch": 1.66, "learning_rate": 4.633230017845402e-05, "loss": 0.5088, "step": 1964, "task_loss": 0.7447141408920288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46119505167007446, "epoch": 1.66, "learning_rate": 4.6327604019911716e-05, "loss": 0.4088, "step": 1965, "task_loss": 0.6016743183135986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49571090936660767, "epoch": 1.66, "learning_rate": 4.63229078613694e-05, "loss": 0.5104, "step": 1966, "task_loss": 0.8246663212776184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6447635889053345, "epoch": 1.66, "learning_rate": 4.631821170282709e-05, "loss": 0.5409, "step": 1967, "task_loss": 1.0070780515670776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4650660753250122, "epoch": 1.66, "learning_rate": 4.6313515544284775e-05, "loss": 0.4314, "step": 1968, "task_loss": 0.7126833200454712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5618286728858948, "epoch": 1.66, "learning_rate": 4.630881938574246e-05, "loss": 0.3939, "step": 1969, "task_loss": 0.2841024696826935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5228209495544434, "epoch": 1.66, "learning_rate": 4.6304123227200155e-05, "loss": 0.6358, "step": 1970, "task_loss": 0.3963019549846649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32013314962387085, "epoch": 1.67, "learning_rate": 4.629942706865784e-05, "loss": 0.5701, "step": 1971, "task_loss": 1.4695099592208862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48744869232177734, "epoch": 1.67, "learning_rate": 4.629473091011553e-05, "loss": 0.512, "step": 1972, "task_loss": 0.9898286461830139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6371152997016907, "epoch": 1.67, "learning_rate": 4.6290034751573214e-05, "loss": 0.5826, "step": 1973, "task_loss": 0.8348256349563599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5498768091201782, "epoch": 1.67, "learning_rate": 4.62853385930309e-05, "loss": 0.5286, "step": 1974, "task_loss": 0.9081346392631531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8800171613693237, "epoch": 1.67, "learning_rate": 4.628064243448859e-05, "loss": 0.4921, "step": 1975, "task_loss": 0.9750813245773315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40541839599609375, "epoch": 1.67, "learning_rate": 4.627594627594628e-05, "loss": 0.5828, "step": 1976, "task_loss": 1.143312692642212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43513357639312744, "epoch": 1.67, "learning_rate": 4.6271250117403966e-05, "loss": 0.584, "step": 1977, "task_loss": 1.2302149534225464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30312874913215637, "epoch": 1.67, "learning_rate": 4.626655395886165e-05, "loss": 0.5572, "step": 1978, "task_loss": 0.867306113243103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.584494948387146, "epoch": 1.67, "learning_rate": 4.626185780031934e-05, "loss": 0.5211, "step": 1979, "task_loss": 1.49024498462677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39544039964675903, "epoch": 1.67, "learning_rate": 4.625716164177703e-05, "loss": 0.4817, "step": 1980, "task_loss": 0.36360305547714233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.559738039970398, "epoch": 1.67, "learning_rate": 4.625246548323471e-05, "loss": 0.4825, "step": 1981, "task_loss": 0.8233970999717712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42535340785980225, "epoch": 1.67, "learning_rate": 4.6247769324692404e-05, "loss": 0.4357, "step": 1982, "task_loss": 1.2819576263427734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2912988066673279, "epoch": 1.68, "learning_rate": 4.624307316615009e-05, "loss": 0.5389, "step": 1983, "task_loss": 0.53116375207901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6343371272087097, "epoch": 1.68, "learning_rate": 4.6238377007607784e-05, "loss": 0.5888, "step": 1984, "task_loss": 0.6254086494445801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5136388540267944, "epoch": 1.68, "learning_rate": 4.623368084906547e-05, "loss": 0.5854, "step": 1985, "task_loss": 0.2664932906627655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36403971910476685, "epoch": 1.68, "learning_rate": 4.622898469052315e-05, "loss": 0.5995, "step": 1986, "task_loss": 0.702233076095581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6623230576515198, "epoch": 1.68, "learning_rate": 4.622428853198084e-05, "loss": 0.6526, "step": 1987, "task_loss": 1.7313801050186157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7548041343688965, "epoch": 1.68, "learning_rate": 4.621959237343853e-05, "loss": 0.647, "step": 1988, "task_loss": 1.599843144416809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7858583927154541, "epoch": 1.68, "learning_rate": 4.621489621489622e-05, "loss": 0.5625, "step": 1989, "task_loss": 0.6673519611358643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6496902108192444, "epoch": 1.68, "learning_rate": 4.62102000563539e-05, "loss": 0.6132, "step": 1990, "task_loss": 0.9289672374725342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24163088202476501, "epoch": 1.68, "learning_rate": 4.6205503897811595e-05, "loss": 0.4131, "step": 1991, "task_loss": 0.7417337894439697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39544031023979187, "epoch": 1.68, "learning_rate": 4.620080773926928e-05, "loss": 0.5012, "step": 1992, "task_loss": 0.4768454432487488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24969041347503662, "epoch": 1.68, "learning_rate": 4.619611158072697e-05, "loss": 0.4169, "step": 1993, "task_loss": 0.6415157914161682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44749224185943604, "epoch": 1.69, "learning_rate": 4.6191415422184654e-05, "loss": 0.5288, "step": 1994, "task_loss": 1.1452547311782837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47349682450294495, "epoch": 1.69, "learning_rate": 4.618671926364234e-05, "loss": 0.4517, "step": 1995, "task_loss": 0.12785297632217407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5869649648666382, "epoch": 1.69, "learning_rate": 4.618202310510003e-05, "loss": 0.5492, "step": 1996, "task_loss": 0.4930233359336853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4729825258255005, "epoch": 1.69, "learning_rate": 4.617732694655772e-05, "loss": 0.6533, "step": 1997, "task_loss": 0.737528920173645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.704756498336792, "epoch": 1.69, "learning_rate": 4.6172630788015406e-05, "loss": 0.6278, "step": 1998, "task_loss": 1.0836979150772095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39312201738357544, "epoch": 1.69, "learning_rate": 4.616793462947309e-05, "loss": 0.4108, "step": 1999, "task_loss": 0.3622550070285797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4303606450557709, "epoch": 1.69, "learning_rate": 4.616323847093078e-05, "loss": 0.5272, "step": 2000, "task_loss": 0.18537604808807373 }, { "epoch": 1.69, "eval_accuracy": 0.904990099009901, "eval_loss": 0.33097419142723083, "eval_runtime": 227.7003, "eval_samples_per_second": 110.891, "eval_steps_per_second": 0.87, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6591658592224121, "epoch": 1.69, "learning_rate": 4.615854231238847e-05, "loss": 0.4892, "step": 2001, "task_loss": 0.21459172666072845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4866146147251129, "epoch": 1.69, "learning_rate": 4.615384615384616e-05, "loss": 0.5009, "step": 2002, "task_loss": 0.181616872549057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47221171855926514, "epoch": 1.69, "learning_rate": 4.6149149995303844e-05, "loss": 0.6112, "step": 2003, "task_loss": 0.7320463061332703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5193889141082764, "epoch": 1.69, "learning_rate": 4.614445383676153e-05, "loss": 0.4626, "step": 2004, "task_loss": 0.6524899005889893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6947740912437439, "epoch": 1.69, "learning_rate": 4.613975767821922e-05, "loss": 0.5746, "step": 2005, "task_loss": 0.37834760546684265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.643571138381958, "epoch": 1.7, "learning_rate": 4.613506151967691e-05, "loss": 0.7315, "step": 2006, "task_loss": 2.4295802116394043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.324611634016037, "epoch": 1.7, "learning_rate": 4.613036536113459e-05, "loss": 0.4947, "step": 2007, "task_loss": 0.06080076843500137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9815917015075684, "epoch": 1.7, "learning_rate": 4.612566920259228e-05, "loss": 0.6428, "step": 2008, "task_loss": 1.59491765499115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4104413390159607, "epoch": 1.7, "learning_rate": 4.612097304404997e-05, "loss": 0.4605, "step": 2009, "task_loss": 0.21556295454502106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46001192927360535, "epoch": 1.7, "learning_rate": 4.6116276885507656e-05, "loss": 0.6466, "step": 2010, "task_loss": 1.0571357011795044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.462877482175827, "epoch": 1.7, "learning_rate": 4.611158072696534e-05, "loss": 0.6265, "step": 2011, "task_loss": 0.43202754855155945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.327553391456604, "epoch": 1.7, "learning_rate": 4.610688456842303e-05, "loss": 0.366, "step": 2012, "task_loss": 0.4694058895111084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5055482387542725, "epoch": 1.7, "learning_rate": 4.610218840988072e-05, "loss": 0.4806, "step": 2013, "task_loss": 1.1611257791519165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45444104075431824, "epoch": 1.7, "learning_rate": 4.609749225133841e-05, "loss": 0.4351, "step": 2014, "task_loss": 0.48207858204841614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5387264490127563, "epoch": 1.7, "learning_rate": 4.6092796092796094e-05, "loss": 0.5912, "step": 2015, "task_loss": 0.5317508578300476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5355345010757446, "epoch": 1.7, "learning_rate": 4.608809993425378e-05, "loss": 0.554, "step": 2016, "task_loss": 1.2317243814468384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5639587044715881, "epoch": 1.7, "learning_rate": 4.608340377571147e-05, "loss": 0.6746, "step": 2017, "task_loss": 2.0961287021636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4055578410625458, "epoch": 1.71, "learning_rate": 4.607870761716916e-05, "loss": 0.5017, "step": 2018, "task_loss": 0.9728465676307678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46916595101356506, "epoch": 1.71, "learning_rate": 4.6074011458626846e-05, "loss": 0.5937, "step": 2019, "task_loss": 0.7556138038635254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5292918086051941, "epoch": 1.71, "learning_rate": 4.606931530008453e-05, "loss": 0.5846, "step": 2020, "task_loss": 0.9846111536026001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4615810215473175, "epoch": 1.71, "learning_rate": 4.606461914154222e-05, "loss": 0.5004, "step": 2021, "task_loss": 0.62034010887146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.62611985206604, "epoch": 1.71, "learning_rate": 4.605992298299991e-05, "loss": 0.4975, "step": 2022, "task_loss": 0.2502720355987549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.583138108253479, "epoch": 1.71, "learning_rate": 4.60552268244576e-05, "loss": 0.4532, "step": 2023, "task_loss": 2.597763776779175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6877407431602478, "epoch": 1.71, "learning_rate": 4.605053066591528e-05, "loss": 0.6673, "step": 2024, "task_loss": 0.628801167011261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7325685620307922, "epoch": 1.71, "learning_rate": 4.604583450737297e-05, "loss": 0.7999, "step": 2025, "task_loss": 1.5668489933013916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3492036759853363, "epoch": 1.71, "learning_rate": 4.604113834883066e-05, "loss": 0.4446, "step": 2026, "task_loss": 0.6551200747489929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6214760541915894, "epoch": 1.71, "learning_rate": 4.603644219028835e-05, "loss": 0.5697, "step": 2027, "task_loss": 1.3091659545898438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45524364709854126, "epoch": 1.71, "learning_rate": 4.603174603174603e-05, "loss": 0.5027, "step": 2028, "task_loss": 0.3164771795272827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5005728006362915, "epoch": 1.71, "learning_rate": 4.602704987320372e-05, "loss": 0.5163, "step": 2029, "task_loss": 0.3878321349620819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41635948419570923, "epoch": 1.72, "learning_rate": 4.602235371466141e-05, "loss": 0.6506, "step": 2030, "task_loss": 0.7554118037223816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5966565608978271, "epoch": 1.72, "learning_rate": 4.6017657556119096e-05, "loss": 0.6486, "step": 2031, "task_loss": 0.6373807191848755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5056440830230713, "epoch": 1.72, "learning_rate": 4.601296139757679e-05, "loss": 0.4492, "step": 2032, "task_loss": 0.23589545488357544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5700172185897827, "epoch": 1.72, "learning_rate": 4.600826523903447e-05, "loss": 0.5041, "step": 2033, "task_loss": 1.4903006553649902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41189807653427124, "epoch": 1.72, "learning_rate": 4.600356908049216e-05, "loss": 0.431, "step": 2034, "task_loss": 0.8087654709815979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4895195960998535, "epoch": 1.72, "learning_rate": 4.599887292194985e-05, "loss": 0.5135, "step": 2035, "task_loss": 0.581380307674408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5028020739555359, "epoch": 1.72, "learning_rate": 4.5994176763407534e-05, "loss": 0.5303, "step": 2036, "task_loss": 0.8911030292510986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.494489848613739, "epoch": 1.72, "learning_rate": 4.598948060486522e-05, "loss": 0.5289, "step": 2037, "task_loss": 1.2926629781723022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8407003879547119, "epoch": 1.72, "learning_rate": 4.598478444632291e-05, "loss": 0.6085, "step": 2038, "task_loss": 1.1930837631225586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4640425145626068, "epoch": 1.72, "learning_rate": 4.59800882877806e-05, "loss": 0.6198, "step": 2039, "task_loss": 0.4546608328819275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.511309802532196, "epoch": 1.72, "learning_rate": 4.5975392129238286e-05, "loss": 0.5555, "step": 2040, "task_loss": 0.8773356676101685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4771813154220581, "epoch": 1.72, "learning_rate": 4.597069597069597e-05, "loss": 0.512, "step": 2041, "task_loss": 0.5802324414253235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5402620434761047, "epoch": 1.73, "learning_rate": 4.596599981215366e-05, "loss": 0.6719, "step": 2042, "task_loss": 0.3395357131958008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3940516710281372, "epoch": 1.73, "learning_rate": 4.5961303653611346e-05, "loss": 0.3756, "step": 2043, "task_loss": 0.8970284461975098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.423417866230011, "epoch": 1.73, "learning_rate": 4.595660749506904e-05, "loss": 0.7119, "step": 2044, "task_loss": 0.757098913192749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35032856464385986, "epoch": 1.73, "learning_rate": 4.595191133652672e-05, "loss": 0.4584, "step": 2045, "task_loss": 1.466117024421692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25562119483947754, "epoch": 1.73, "learning_rate": 4.594721517798441e-05, "loss": 0.5246, "step": 2046, "task_loss": 0.350289523601532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3606404662132263, "epoch": 1.73, "learning_rate": 4.59425190194421e-05, "loss": 0.4952, "step": 2047, "task_loss": 1.1569145917892456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3920883536338806, "epoch": 1.73, "learning_rate": 4.593782286089979e-05, "loss": 0.4757, "step": 2048, "task_loss": 0.47120994329452515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.52159184217453, "epoch": 1.73, "learning_rate": 4.593312670235748e-05, "loss": 0.4805, "step": 2049, "task_loss": 1.0600637197494507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6532259583473206, "epoch": 1.73, "learning_rate": 4.592843054381516e-05, "loss": 0.7079, "step": 2050, "task_loss": 0.5629146099090576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5053825974464417, "epoch": 1.73, "learning_rate": 4.592373438527285e-05, "loss": 0.5325, "step": 2051, "task_loss": 0.24431300163269043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3835293650627136, "epoch": 1.73, "learning_rate": 4.5919038226730536e-05, "loss": 0.605, "step": 2052, "task_loss": 0.8664827346801758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4695228338241577, "epoch": 1.73, "learning_rate": 4.591434206818823e-05, "loss": 0.4481, "step": 2053, "task_loss": 0.5434091687202454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3437801003456116, "epoch": 1.74, "learning_rate": 4.590964590964591e-05, "loss": 0.5702, "step": 2054, "task_loss": 0.17606034874916077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.691542387008667, "epoch": 1.74, "learning_rate": 4.59049497511036e-05, "loss": 0.5252, "step": 2055, "task_loss": 1.209094524383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5686816573143005, "epoch": 1.74, "learning_rate": 4.590025359256129e-05, "loss": 0.5176, "step": 2056, "task_loss": 0.7480659484863281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4564830958843231, "epoch": 1.74, "learning_rate": 4.5895557434018975e-05, "loss": 0.5654, "step": 2057, "task_loss": 1.0726795196533203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4620174765586853, "epoch": 1.74, "learning_rate": 4.589086127547666e-05, "loss": 0.4045, "step": 2058, "task_loss": 0.7516773343086243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5002691149711609, "epoch": 1.74, "learning_rate": 4.588616511693435e-05, "loss": 0.4714, "step": 2059, "task_loss": 1.3765820264816284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3602658808231354, "epoch": 1.74, "learning_rate": 4.588146895839204e-05, "loss": 0.4749, "step": 2060, "task_loss": 0.10768648236989975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27802640199661255, "epoch": 1.74, "learning_rate": 4.587677279984973e-05, "loss": 0.5232, "step": 2061, "task_loss": 0.1309250295162201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6688908934593201, "epoch": 1.74, "learning_rate": 4.587207664130741e-05, "loss": 0.5011, "step": 2062, "task_loss": 0.808330774307251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4703649878501892, "epoch": 1.74, "learning_rate": 4.58673804827651e-05, "loss": 0.3553, "step": 2063, "task_loss": 0.040938060730695724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4743626117706299, "epoch": 1.74, "learning_rate": 4.5862684324222786e-05, "loss": 0.4116, "step": 2064, "task_loss": 0.8179786205291748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.675616979598999, "epoch": 1.75, "learning_rate": 4.585798816568048e-05, "loss": 0.4769, "step": 2065, "task_loss": 0.4578036069869995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8712598085403442, "epoch": 1.75, "learning_rate": 4.5853292007138165e-05, "loss": 0.7874, "step": 2066, "task_loss": 0.7406539916992188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6521067023277283, "epoch": 1.75, "learning_rate": 4.584859584859585e-05, "loss": 0.6392, "step": 2067, "task_loss": 1.0126603841781616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2566328048706055, "epoch": 1.75, "learning_rate": 4.584389969005354e-05, "loss": 0.7131, "step": 2068, "task_loss": 1.0378272533416748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36832600831985474, "epoch": 1.75, "learning_rate": 4.5839203531511224e-05, "loss": 0.4704, "step": 2069, "task_loss": 0.841235876083374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5462263822555542, "epoch": 1.75, "learning_rate": 4.583450737296892e-05, "loss": 0.6086, "step": 2070, "task_loss": 0.0827615037560463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22516904771327972, "epoch": 1.75, "learning_rate": 4.58298112144266e-05, "loss": 0.433, "step": 2071, "task_loss": 0.16937336325645447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6932924389839172, "epoch": 1.75, "learning_rate": 4.582511505588429e-05, "loss": 0.6002, "step": 2072, "task_loss": 0.0861826241016388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3899523615837097, "epoch": 1.75, "learning_rate": 4.5820418897341976e-05, "loss": 0.6182, "step": 2073, "task_loss": 0.298385888338089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36478549242019653, "epoch": 1.75, "learning_rate": 4.581572273879966e-05, "loss": 0.4347, "step": 2074, "task_loss": 0.39235949516296387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.617392897605896, "epoch": 1.75, "learning_rate": 4.581102658025735e-05, "loss": 0.4981, "step": 2075, "task_loss": 1.016727328300476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4272981286048889, "epoch": 1.75, "learning_rate": 4.5806330421715035e-05, "loss": 0.3981, "step": 2076, "task_loss": 0.13504400849342346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2955678105354309, "epoch": 1.76, "learning_rate": 4.580163426317273e-05, "loss": 0.3571, "step": 2077, "task_loss": 0.12647226452827454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4585294723510742, "epoch": 1.76, "learning_rate": 4.5796938104630415e-05, "loss": 0.4787, "step": 2078, "task_loss": 0.5914861559867859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3602704107761383, "epoch": 1.76, "learning_rate": 4.579224194608811e-05, "loss": 0.5521, "step": 2079, "task_loss": 0.1589622050523758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35574787855148315, "epoch": 1.76, "learning_rate": 4.578754578754579e-05, "loss": 0.4896, "step": 2080, "task_loss": 0.8386611342430115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5386511087417603, "epoch": 1.76, "learning_rate": 4.5782849629003474e-05, "loss": 0.6548, "step": 2081, "task_loss": 0.7850254774093628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7446581125259399, "epoch": 1.76, "learning_rate": 4.577815347046117e-05, "loss": 0.6338, "step": 2082, "task_loss": 0.7763950824737549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5751662850379944, "epoch": 1.76, "learning_rate": 4.577345731191885e-05, "loss": 0.5322, "step": 2083, "task_loss": 2.5131635665893555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34754621982574463, "epoch": 1.76, "learning_rate": 4.576876115337654e-05, "loss": 0.4211, "step": 2084, "task_loss": 0.1351654976606369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42446279525756836, "epoch": 1.76, "learning_rate": 4.5764064994834226e-05, "loss": 0.5849, "step": 2085, "task_loss": 2.179248809814453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5092846155166626, "epoch": 1.76, "learning_rate": 4.575936883629192e-05, "loss": 0.5062, "step": 2086, "task_loss": 0.4836285710334778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2724847197532654, "epoch": 1.76, "learning_rate": 4.5754672677749605e-05, "loss": 0.3913, "step": 2087, "task_loss": 0.23720763623714447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6237320899963379, "epoch": 1.76, "learning_rate": 4.5749976519207285e-05, "loss": 0.5447, "step": 2088, "task_loss": 0.35276830196380615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5783926844596863, "epoch": 1.77, "learning_rate": 4.574528036066498e-05, "loss": 0.6048, "step": 2089, "task_loss": 1.3933990001678467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4360746145248413, "epoch": 1.77, "learning_rate": 4.5740584202122664e-05, "loss": 0.598, "step": 2090, "task_loss": 0.9829727411270142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3990347683429718, "epoch": 1.77, "learning_rate": 4.573588804358036e-05, "loss": 0.6707, "step": 2091, "task_loss": 0.6798000335693359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2960702180862427, "epoch": 1.77, "learning_rate": 4.573119188503804e-05, "loss": 0.5829, "step": 2092, "task_loss": 0.5920838713645935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6754752993583679, "epoch": 1.77, "learning_rate": 4.572649572649573e-05, "loss": 0.5883, "step": 2093, "task_loss": 0.9752802848815918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5618507266044617, "epoch": 1.77, "learning_rate": 4.5721799567953417e-05, "loss": 0.5188, "step": 2094, "task_loss": 0.8732621669769287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5569792985916138, "epoch": 1.77, "learning_rate": 4.57171034094111e-05, "loss": 0.5445, "step": 2095, "task_loss": 0.9643384218215942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31552353501319885, "epoch": 1.77, "learning_rate": 4.5712407250868796e-05, "loss": 0.3805, "step": 2096, "task_loss": 1.038280725479126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.583950400352478, "epoch": 1.77, "learning_rate": 4.5707711092326476e-05, "loss": 0.5644, "step": 2097, "task_loss": 1.3596307039260864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23062486946582794, "epoch": 1.77, "learning_rate": 4.570301493378417e-05, "loss": 0.4612, "step": 2098, "task_loss": 0.7762107849121094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5040269494056702, "epoch": 1.77, "learning_rate": 4.5698318775241855e-05, "loss": 0.615, "step": 2099, "task_loss": 0.8172342777252197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21802948415279388, "epoch": 1.77, "learning_rate": 4.569362261669954e-05, "loss": 0.3645, "step": 2100, "task_loss": 0.4495164155960083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4093010425567627, "epoch": 1.78, "learning_rate": 4.568892645815723e-05, "loss": 0.4773, "step": 2101, "task_loss": 0.6140013337135315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7371014356613159, "epoch": 1.78, "learning_rate": 4.5684230299614914e-05, "loss": 0.5325, "step": 2102, "task_loss": 0.8337481021881104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38302743434906006, "epoch": 1.78, "learning_rate": 4.567953414107261e-05, "loss": 0.4941, "step": 2103, "task_loss": 0.5175051689147949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36614078283309937, "epoch": 1.78, "learning_rate": 4.5674837982530294e-05, "loss": 0.4696, "step": 2104, "task_loss": 0.8599974513053894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7975735068321228, "epoch": 1.78, "learning_rate": 4.567014182398798e-05, "loss": 0.6115, "step": 2105, "task_loss": 0.7631282806396484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39564090967178345, "epoch": 1.78, "learning_rate": 4.5665445665445666e-05, "loss": 0.4693, "step": 2106, "task_loss": 1.1201039552688599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6424553394317627, "epoch": 1.78, "learning_rate": 4.566074950690335e-05, "loss": 0.7802, "step": 2107, "task_loss": 0.7485374212265015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7212977409362793, "epoch": 1.78, "learning_rate": 4.5656053348361046e-05, "loss": 0.548, "step": 2108, "task_loss": 0.9998968839645386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6507986783981323, "epoch": 1.78, "learning_rate": 4.565135718981873e-05, "loss": 0.5358, "step": 2109, "task_loss": 1.282192587852478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4429655373096466, "epoch": 1.78, "learning_rate": 4.564666103127642e-05, "loss": 0.5711, "step": 2110, "task_loss": 0.7206053137779236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39231905341148376, "epoch": 1.78, "learning_rate": 4.5641964872734105e-05, "loss": 0.5264, "step": 2111, "task_loss": 0.39057064056396484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4905829429626465, "epoch": 1.78, "learning_rate": 4.563726871419179e-05, "loss": 0.6899, "step": 2112, "task_loss": 0.13349761068820953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49836465716362, "epoch": 1.79, "learning_rate": 4.5632572555649484e-05, "loss": 0.5453, "step": 2113, "task_loss": 0.1886162906885147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5580978989601135, "epoch": 1.79, "learning_rate": 4.5627876397107164e-05, "loss": 0.5938, "step": 2114, "task_loss": 0.868122398853302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6009644269943237, "epoch": 1.79, "learning_rate": 4.562318023856486e-05, "loss": 0.5606, "step": 2115, "task_loss": 1.0359578132629395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4018334150314331, "epoch": 1.79, "learning_rate": 4.561848408002254e-05, "loss": 0.5318, "step": 2116, "task_loss": 0.8555917143821716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40596720576286316, "epoch": 1.79, "learning_rate": 4.5613787921480236e-05, "loss": 0.571, "step": 2117, "task_loss": 0.30545079708099365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6815071702003479, "epoch": 1.79, "learning_rate": 4.5609091762937916e-05, "loss": 0.4764, "step": 2118, "task_loss": 0.6880651116371155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7121834754943848, "epoch": 1.79, "learning_rate": 4.56043956043956e-05, "loss": 0.5259, "step": 2119, "task_loss": 1.4275411367416382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33292773365974426, "epoch": 1.79, "learning_rate": 4.5599699445853295e-05, "loss": 0.7284, "step": 2120, "task_loss": 0.7352378964424133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4035104513168335, "epoch": 1.79, "learning_rate": 4.559500328731098e-05, "loss": 0.5288, "step": 2121, "task_loss": 1.265568494796753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47141703963279724, "epoch": 1.79, "learning_rate": 4.559030712876867e-05, "loss": 0.5049, "step": 2122, "task_loss": 0.6432398557662964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7463590502738953, "epoch": 1.79, "learning_rate": 4.5585610970226354e-05, "loss": 0.7512, "step": 2123, "task_loss": 1.4116387367248535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.571821928024292, "epoch": 1.79, "learning_rate": 4.558091481168405e-05, "loss": 0.5925, "step": 2124, "task_loss": 1.356939435005188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6260712146759033, "epoch": 1.8, "learning_rate": 4.5576218653141734e-05, "loss": 0.6078, "step": 2125, "task_loss": 0.6571511030197144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6212284564971924, "epoch": 1.8, "learning_rate": 4.557152249459942e-05, "loss": 0.5109, "step": 2126, "task_loss": 0.9573767185211182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5255595445632935, "epoch": 1.8, "learning_rate": 4.5566826336057106e-05, "loss": 0.59, "step": 2127, "task_loss": 0.652902364730835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4078429937362671, "epoch": 1.8, "learning_rate": 4.556213017751479e-05, "loss": 0.5608, "step": 2128, "task_loss": 0.7663223743438721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6555610299110413, "epoch": 1.8, "learning_rate": 4.5557434018972486e-05, "loss": 0.5736, "step": 2129, "task_loss": 0.16959886252880096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25424739718437195, "epoch": 1.8, "learning_rate": 4.555273786043017e-05, "loss": 0.533, "step": 2130, "task_loss": 0.2955516576766968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6188502311706543, "epoch": 1.8, "learning_rate": 4.554804170188786e-05, "loss": 0.499, "step": 2131, "task_loss": 0.5569431185722351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5260052680969238, "epoch": 1.8, "learning_rate": 4.5543345543345545e-05, "loss": 0.4241, "step": 2132, "task_loss": 1.1661746501922607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3231273591518402, "epoch": 1.8, "learning_rate": 4.553864938480323e-05, "loss": 0.3994, "step": 2133, "task_loss": 0.8574492931365967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8729653358459473, "epoch": 1.8, "learning_rate": 4.5533953226260924e-05, "loss": 0.629, "step": 2134, "task_loss": 0.5370767116546631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5918306112289429, "epoch": 1.8, "learning_rate": 4.5529257067718604e-05, "loss": 0.5093, "step": 2135, "task_loss": 1.4300636053085327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3197835385799408, "epoch": 1.81, "learning_rate": 4.55245609091763e-05, "loss": 0.4224, "step": 2136, "task_loss": 0.42014288902282715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5788506269454956, "epoch": 1.81, "learning_rate": 4.5519864750633983e-05, "loss": 0.5234, "step": 2137, "task_loss": 1.5012747049331665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5487416982650757, "epoch": 1.81, "learning_rate": 4.551516859209167e-05, "loss": 0.5452, "step": 2138, "task_loss": 1.2507144212722778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2663614749908447, "epoch": 1.81, "learning_rate": 4.551047243354936e-05, "loss": 0.3141, "step": 2139, "task_loss": 0.38411852717399597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6469610333442688, "epoch": 1.81, "learning_rate": 4.550577627500704e-05, "loss": 0.5484, "step": 2140, "task_loss": 0.7316147685050964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33591869473457336, "epoch": 1.81, "learning_rate": 4.5501080116464736e-05, "loss": 0.4701, "step": 2141, "task_loss": 0.15448403358459473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.845852255821228, "epoch": 1.81, "learning_rate": 4.549638395792242e-05, "loss": 0.774, "step": 2142, "task_loss": 0.9374898076057434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5409667491912842, "epoch": 1.81, "learning_rate": 4.549168779938011e-05, "loss": 0.6409, "step": 2143, "task_loss": 1.0946840047836304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7807337045669556, "epoch": 1.81, "learning_rate": 4.5486991640837795e-05, "loss": 0.5916, "step": 2144, "task_loss": 1.2808492183685303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3663102686405182, "epoch": 1.81, "learning_rate": 4.548229548229548e-05, "loss": 0.507, "step": 2145, "task_loss": 0.5666319727897644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6047992706298828, "epoch": 1.81, "learning_rate": 4.5477599323753174e-05, "loss": 0.4602, "step": 2146, "task_loss": 1.170518159866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4351532459259033, "epoch": 1.81, "learning_rate": 4.547290316521086e-05, "loss": 0.5223, "step": 2147, "task_loss": 0.5255849361419678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7757853865623474, "epoch": 1.82, "learning_rate": 4.546820700666855e-05, "loss": 0.5872, "step": 2148, "task_loss": 0.361608624458313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6012972593307495, "epoch": 1.82, "learning_rate": 4.546351084812623e-05, "loss": 0.5811, "step": 2149, "task_loss": 0.5504059791564941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44565412402153015, "epoch": 1.82, "learning_rate": 4.5458814689583926e-05, "loss": 0.5359, "step": 2150, "task_loss": 0.2065805345773697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.589625358581543, "epoch": 1.82, "learning_rate": 4.545411853104161e-05, "loss": 0.5279, "step": 2151, "task_loss": 0.6667364835739136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4272102415561676, "epoch": 1.82, "learning_rate": 4.544942237249929e-05, "loss": 0.4566, "step": 2152, "task_loss": 0.796841025352478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5954587459564209, "epoch": 1.82, "learning_rate": 4.5444726213956985e-05, "loss": 0.5288, "step": 2153, "task_loss": 0.7692040801048279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7058021426200867, "epoch": 1.82, "learning_rate": 4.544003005541467e-05, "loss": 0.625, "step": 2154, "task_loss": 1.066992163658142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42417773604393005, "epoch": 1.82, "learning_rate": 4.5435333896872365e-05, "loss": 0.4271, "step": 2155, "task_loss": 0.7044788002967834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8196185827255249, "epoch": 1.82, "learning_rate": 4.543063773833005e-05, "loss": 0.5676, "step": 2156, "task_loss": 1.4717459678649902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41547542810440063, "epoch": 1.82, "learning_rate": 4.542594157978774e-05, "loss": 0.5298, "step": 2157, "task_loss": 0.7178290486335754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49613094329833984, "epoch": 1.82, "learning_rate": 4.5421245421245424e-05, "loss": 0.4604, "step": 2158, "task_loss": 0.6860933303833008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6807384490966797, "epoch": 1.82, "learning_rate": 4.541654926270311e-05, "loss": 0.6239, "step": 2159, "task_loss": 1.0852456092834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.582312822341919, "epoch": 1.83, "learning_rate": 4.54118531041608e-05, "loss": 0.6066, "step": 2160, "task_loss": 0.9413288831710815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3024747967720032, "epoch": 1.83, "learning_rate": 4.540715694561848e-05, "loss": 0.5687, "step": 2161, "task_loss": 0.24552661180496216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7132899761199951, "epoch": 1.83, "learning_rate": 4.5402460787076176e-05, "loss": 0.6837, "step": 2162, "task_loss": 0.6264849305152893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27583473920822144, "epoch": 1.83, "learning_rate": 4.539776462853386e-05, "loss": 0.608, "step": 2163, "task_loss": 0.10471760481595993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4249713718891144, "epoch": 1.83, "learning_rate": 4.539306846999155e-05, "loss": 0.4802, "step": 2164, "task_loss": 0.48982134461402893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5520041584968567, "epoch": 1.83, "learning_rate": 4.5388372311449235e-05, "loss": 0.4561, "step": 2165, "task_loss": 0.7499051690101624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44273197650909424, "epoch": 1.83, "learning_rate": 4.538367615290692e-05, "loss": 0.3812, "step": 2166, "task_loss": 0.7038940191268921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41414177417755127, "epoch": 1.83, "learning_rate": 4.5378979994364614e-05, "loss": 0.5456, "step": 2167, "task_loss": 0.28725168108940125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7032431960105896, "epoch": 1.83, "learning_rate": 4.53742838358223e-05, "loss": 0.5515, "step": 2168, "task_loss": 0.826956570148468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4881632328033447, "epoch": 1.83, "learning_rate": 4.536958767727999e-05, "loss": 0.6441, "step": 2169, "task_loss": 0.23439858853816986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5184561014175415, "epoch": 1.83, "learning_rate": 4.536489151873767e-05, "loss": 0.4243, "step": 2170, "task_loss": 0.6797229051589966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7807387113571167, "epoch": 1.83, "learning_rate": 4.536019536019536e-05, "loss": 0.6407, "step": 2171, "task_loss": 1.397269368171692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.620652973651886, "epoch": 1.84, "learning_rate": 4.535549920165305e-05, "loss": 0.5552, "step": 2172, "task_loss": 1.65139639377594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.829352617263794, "epoch": 1.84, "learning_rate": 4.535080304311074e-05, "loss": 0.5462, "step": 2173, "task_loss": 0.4907941520214081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.637789249420166, "epoch": 1.84, "learning_rate": 4.5346106884568425e-05, "loss": 0.572, "step": 2174, "task_loss": 0.952836275100708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25070053339004517, "epoch": 1.84, "learning_rate": 4.534141072602611e-05, "loss": 0.4687, "step": 2175, "task_loss": 0.17430594563484192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41071581840515137, "epoch": 1.84, "learning_rate": 4.53367145674838e-05, "loss": 0.5044, "step": 2176, "task_loss": 1.1735378503799438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3028821349143982, "epoch": 1.84, "learning_rate": 4.533201840894149e-05, "loss": 0.4292, "step": 2177, "task_loss": 0.20326544344425201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4037761092185974, "epoch": 1.84, "learning_rate": 4.532732225039917e-05, "loss": 0.4835, "step": 2178, "task_loss": 0.7271296977996826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47745582461357117, "epoch": 1.84, "learning_rate": 4.5322626091856864e-05, "loss": 0.497, "step": 2179, "task_loss": 0.9477565288543701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3540424704551697, "epoch": 1.84, "learning_rate": 4.531792993331455e-05, "loss": 0.4406, "step": 2180, "task_loss": 0.9853403568267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6484129428863525, "epoch": 1.84, "learning_rate": 4.531323377477224e-05, "loss": 0.568, "step": 2181, "task_loss": 1.2559936046600342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5226573944091797, "epoch": 1.84, "learning_rate": 4.530853761622992e-05, "loss": 0.4311, "step": 2182, "task_loss": 0.2541341781616211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3125724792480469, "epoch": 1.84, "learning_rate": 4.530384145768761e-05, "loss": 0.474, "step": 2183, "task_loss": 1.44239342212677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.578337550163269, "epoch": 1.85, "learning_rate": 4.52991452991453e-05, "loss": 0.7536, "step": 2184, "task_loss": 1.3004363775253296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6428928375244141, "epoch": 1.85, "learning_rate": 4.529444914060299e-05, "loss": 0.5545, "step": 2185, "task_loss": 0.6410274505615234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.655086100101471, "epoch": 1.85, "learning_rate": 4.528975298206068e-05, "loss": 0.668, "step": 2186, "task_loss": 1.9022458791732788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7058622241020203, "epoch": 1.85, "learning_rate": 4.528505682351836e-05, "loss": 0.3764, "step": 2187, "task_loss": 0.5981324315071106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5062519311904907, "epoch": 1.85, "learning_rate": 4.5280360664976054e-05, "loss": 0.5433, "step": 2188, "task_loss": 0.43108922243118286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2743852138519287, "epoch": 1.85, "learning_rate": 4.527566450643374e-05, "loss": 0.2869, "step": 2189, "task_loss": 0.3801107406616211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3619213104248047, "epoch": 1.85, "learning_rate": 4.527096834789143e-05, "loss": 0.4097, "step": 2190, "task_loss": 0.5799493789672852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5339503288269043, "epoch": 1.85, "learning_rate": 4.5266272189349114e-05, "loss": 0.5173, "step": 2191, "task_loss": 0.615601122379303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35835206508636475, "epoch": 1.85, "learning_rate": 4.52615760308068e-05, "loss": 0.4037, "step": 2192, "task_loss": 0.32787516713142395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41713377833366394, "epoch": 1.85, "learning_rate": 4.525687987226449e-05, "loss": 0.5448, "step": 2193, "task_loss": 0.554107129573822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5735433101654053, "epoch": 1.85, "learning_rate": 4.525218371372218e-05, "loss": 0.4704, "step": 2194, "task_loss": 0.6607270240783691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6074005365371704, "epoch": 1.85, "learning_rate": 4.5247487555179866e-05, "loss": 0.4724, "step": 2195, "task_loss": 0.5390154719352722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30267736315727234, "epoch": 1.86, "learning_rate": 4.524279139663755e-05, "loss": 0.5718, "step": 2196, "task_loss": 0.3005446195602417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3553377687931061, "epoch": 1.86, "learning_rate": 4.523809523809524e-05, "loss": 0.5129, "step": 2197, "task_loss": 0.5187085270881653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.294472336769104, "epoch": 1.86, "learning_rate": 4.523339907955293e-05, "loss": 0.5451, "step": 2198, "task_loss": 0.4742613136768341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.563508927822113, "epoch": 1.86, "learning_rate": 4.522870292101061e-05, "loss": 0.5543, "step": 2199, "task_loss": 0.7037122845649719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33230429887771606, "epoch": 1.86, "learning_rate": 4.5224006762468304e-05, "loss": 0.6198, "step": 2200, "task_loss": 0.6052303314208984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5442047119140625, "epoch": 1.86, "learning_rate": 4.521931060392599e-05, "loss": 0.4875, "step": 2201, "task_loss": 0.5520533919334412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43952640891075134, "epoch": 1.86, "learning_rate": 4.521461444538368e-05, "loss": 0.4513, "step": 2202, "task_loss": 0.3257262706756592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3801150918006897, "epoch": 1.86, "learning_rate": 4.520991828684137e-05, "loss": 0.4079, "step": 2203, "task_loss": 0.6574422121047974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42563432455062866, "epoch": 1.86, "learning_rate": 4.520522212829905e-05, "loss": 0.4891, "step": 2204, "task_loss": 0.7676490545272827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49109360575675964, "epoch": 1.86, "learning_rate": 4.520052596975674e-05, "loss": 0.428, "step": 2205, "task_loss": 0.5695843696594238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3752708435058594, "epoch": 1.86, "learning_rate": 4.519582981121443e-05, "loss": 0.5115, "step": 2206, "task_loss": 0.4675496518611908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48420971632003784, "epoch": 1.87, "learning_rate": 4.5191133652672115e-05, "loss": 0.5828, "step": 2207, "task_loss": 0.9840088486671448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4950655698776245, "epoch": 1.87, "learning_rate": 4.51864374941298e-05, "loss": 0.5466, "step": 2208, "task_loss": 0.5158939957618713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6614522933959961, "epoch": 1.87, "learning_rate": 4.518174133558749e-05, "loss": 0.476, "step": 2209, "task_loss": 0.9019045233726501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5384057760238647, "epoch": 1.87, "learning_rate": 4.517704517704518e-05, "loss": 0.4236, "step": 2210, "task_loss": 0.8246455788612366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39109566807746887, "epoch": 1.87, "learning_rate": 4.517234901850287e-05, "loss": 0.3618, "step": 2211, "task_loss": 0.6517609357833862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47159406542778015, "epoch": 1.87, "learning_rate": 4.5167652859960554e-05, "loss": 0.4765, "step": 2212, "task_loss": 1.1871362924575806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4874321222305298, "epoch": 1.87, "learning_rate": 4.516295670141824e-05, "loss": 0.5, "step": 2213, "task_loss": 0.7124010920524597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5585620403289795, "epoch": 1.87, "learning_rate": 4.5158260542875926e-05, "loss": 0.5257, "step": 2214, "task_loss": 0.7095432877540588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9153774976730347, "epoch": 1.87, "learning_rate": 4.515356438433362e-05, "loss": 0.6041, "step": 2215, "task_loss": 0.7734924554824829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7772361636161804, "epoch": 1.87, "learning_rate": 4.5148868225791306e-05, "loss": 0.4374, "step": 2216, "task_loss": 0.6268754005432129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49831143021583557, "epoch": 1.87, "learning_rate": 4.514417206724899e-05, "loss": 0.4814, "step": 2217, "task_loss": 0.7295777797698975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4912940263748169, "epoch": 1.87, "learning_rate": 4.513947590870668e-05, "loss": 0.5651, "step": 2218, "task_loss": 1.9122928380966187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4721025228500366, "epoch": 1.88, "learning_rate": 4.513477975016437e-05, "loss": 0.4508, "step": 2219, "task_loss": 0.4070320725440979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24298636615276337, "epoch": 1.88, "learning_rate": 4.513008359162206e-05, "loss": 0.5555, "step": 2220, "task_loss": 0.35527098178863525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2279401570558548, "epoch": 1.88, "learning_rate": 4.5125387433079744e-05, "loss": 0.5776, "step": 2221, "task_loss": 0.2882367968559265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.407845139503479, "epoch": 1.88, "learning_rate": 4.512069127453743e-05, "loss": 0.4195, "step": 2222, "task_loss": 0.43978166580200195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5752991437911987, "epoch": 1.88, "learning_rate": 4.511599511599512e-05, "loss": 0.4666, "step": 2223, "task_loss": 1.2600221633911133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.413982093334198, "epoch": 1.88, "learning_rate": 4.511129895745281e-05, "loss": 0.4144, "step": 2224, "task_loss": 0.7930886745452881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34477707743644714, "epoch": 1.88, "learning_rate": 4.510660279891049e-05, "loss": 0.5246, "step": 2225, "task_loss": 0.8348634243011475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23894600570201874, "epoch": 1.88, "learning_rate": 4.510190664036818e-05, "loss": 0.4209, "step": 2226, "task_loss": 0.11770555377006531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3018686771392822, "epoch": 1.88, "learning_rate": 4.509721048182587e-05, "loss": 0.5331, "step": 2227, "task_loss": 0.4910077452659607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5459016561508179, "epoch": 1.88, "learning_rate": 4.5092514323283556e-05, "loss": 0.4951, "step": 2228, "task_loss": 0.8900324702262878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6447457075119019, "epoch": 1.88, "learning_rate": 4.508781816474124e-05, "loss": 0.4969, "step": 2229, "task_loss": 0.48972901701927185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6700356006622314, "epoch": 1.88, "learning_rate": 4.508312200619893e-05, "loss": 0.6886, "step": 2230, "task_loss": 1.27056884765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4674208164215088, "epoch": 1.89, "learning_rate": 4.507842584765662e-05, "loss": 0.6246, "step": 2231, "task_loss": 0.8575214743614197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3030596673488617, "epoch": 1.89, "learning_rate": 4.507372968911431e-05, "loss": 0.4507, "step": 2232, "task_loss": 0.27985879778862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5640263557434082, "epoch": 1.89, "learning_rate": 4.5069033530571994e-05, "loss": 0.5935, "step": 2233, "task_loss": 0.8245450854301453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6157797574996948, "epoch": 1.89, "learning_rate": 4.506433737202968e-05, "loss": 0.512, "step": 2234, "task_loss": 0.3441934287548065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7360492944717407, "epoch": 1.89, "learning_rate": 4.505964121348737e-05, "loss": 0.5686, "step": 2235, "task_loss": 0.9245166182518005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5701268315315247, "epoch": 1.89, "learning_rate": 4.505494505494506e-05, "loss": 0.3779, "step": 2236, "task_loss": 0.38714271783828735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7124241590499878, "epoch": 1.89, "learning_rate": 4.5050248896402746e-05, "loss": 0.625, "step": 2237, "task_loss": 0.9268093705177307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5160531401634216, "epoch": 1.89, "learning_rate": 4.504555273786043e-05, "loss": 0.4563, "step": 2238, "task_loss": 0.9240202903747559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5146157145500183, "epoch": 1.89, "learning_rate": 4.504085657931812e-05, "loss": 0.4871, "step": 2239, "task_loss": 0.4551108479499817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39063483476638794, "epoch": 1.89, "learning_rate": 4.5036160420775805e-05, "loss": 0.488, "step": 2240, "task_loss": 0.5883594155311584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31814855337142944, "epoch": 1.89, "learning_rate": 4.50314642622335e-05, "loss": 0.4786, "step": 2241, "task_loss": 0.3789138197898865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36893779039382935, "epoch": 1.89, "learning_rate": 4.502676810369118e-05, "loss": 0.578, "step": 2242, "task_loss": 1.401842474937439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45042210817337036, "epoch": 1.9, "learning_rate": 4.502207194514887e-05, "loss": 0.4621, "step": 2243, "task_loss": 0.882178008556366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5337485074996948, "epoch": 1.9, "learning_rate": 4.501737578660656e-05, "loss": 0.5552, "step": 2244, "task_loss": 0.5124814510345459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44250887632369995, "epoch": 1.9, "learning_rate": 4.501267962806425e-05, "loss": 0.65, "step": 2245, "task_loss": 0.5449134111404419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5950537919998169, "epoch": 1.9, "learning_rate": 4.500798346952193e-05, "loss": 0.5676, "step": 2246, "task_loss": 0.7503431439399719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7407461404800415, "epoch": 1.9, "learning_rate": 4.5003287310979616e-05, "loss": 0.582, "step": 2247, "task_loss": 0.6328091025352478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6619239449501038, "epoch": 1.9, "learning_rate": 4.499859115243731e-05, "loss": 0.5603, "step": 2248, "task_loss": 1.067496418952942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5679786205291748, "epoch": 1.9, "learning_rate": 4.4993894993894996e-05, "loss": 0.5076, "step": 2249, "task_loss": 0.5655072927474976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6375457644462585, "epoch": 1.9, "learning_rate": 4.498919883535269e-05, "loss": 0.5349, "step": 2250, "task_loss": 1.417980432510376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48016980290412903, "epoch": 1.9, "learning_rate": 4.498450267681037e-05, "loss": 0.4689, "step": 2251, "task_loss": 0.8580188751220703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2589854896068573, "epoch": 1.9, "learning_rate": 4.497980651826806e-05, "loss": 0.4222, "step": 2252, "task_loss": 0.2614496052265167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5103530287742615, "epoch": 1.9, "learning_rate": 4.497511035972575e-05, "loss": 0.4324, "step": 2253, "task_loss": 0.6431676745414734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6913448572158813, "epoch": 1.9, "learning_rate": 4.4970414201183434e-05, "loss": 0.4315, "step": 2254, "task_loss": 0.26369866728782654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45819130539894104, "epoch": 1.91, "learning_rate": 4.496571804264112e-05, "loss": 0.3463, "step": 2255, "task_loss": 1.0589169263839722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.353249192237854, "epoch": 1.91, "learning_rate": 4.496102188409881e-05, "loss": 0.4943, "step": 2256, "task_loss": 0.5804387331008911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25524723529815674, "epoch": 1.91, "learning_rate": 4.49563257255565e-05, "loss": 0.5528, "step": 2257, "task_loss": 0.035556524991989136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37007302045822144, "epoch": 1.91, "learning_rate": 4.4951629567014186e-05, "loss": 0.2923, "step": 2258, "task_loss": 0.0676429346203804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3159511089324951, "epoch": 1.91, "learning_rate": 4.494693340847187e-05, "loss": 0.5108, "step": 2259, "task_loss": 0.8213542699813843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4573473632335663, "epoch": 1.91, "learning_rate": 4.494223724992956e-05, "loss": 0.4731, "step": 2260, "task_loss": 1.334798812866211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5336796641349792, "epoch": 1.91, "learning_rate": 4.4937541091387245e-05, "loss": 0.5235, "step": 2261, "task_loss": 0.8721821308135986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37542665004730225, "epoch": 1.91, "learning_rate": 4.493284493284494e-05, "loss": 0.4349, "step": 2262, "task_loss": 0.6844853162765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41831448674201965, "epoch": 1.91, "learning_rate": 4.4928148774302625e-05, "loss": 0.3701, "step": 2263, "task_loss": 0.19918948411941528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7168883085250854, "epoch": 1.91, "learning_rate": 4.492345261576031e-05, "loss": 0.5545, "step": 2264, "task_loss": 0.4741404056549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36223727464675903, "epoch": 1.91, "learning_rate": 4.4918756457218e-05, "loss": 0.4288, "step": 2265, "task_loss": 0.4317978620529175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8133450746536255, "epoch": 1.91, "learning_rate": 4.4914060298675684e-05, "loss": 0.641, "step": 2266, "task_loss": 1.952117681503296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4052419364452362, "epoch": 1.92, "learning_rate": 4.490936414013338e-05, "loss": 0.4599, "step": 2267, "task_loss": 0.324542760848999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38399559259414673, "epoch": 1.92, "learning_rate": 4.4904667981591057e-05, "loss": 0.4148, "step": 2268, "task_loss": 0.5394772887229919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3089216649532318, "epoch": 1.92, "learning_rate": 4.489997182304875e-05, "loss": 0.4466, "step": 2269, "task_loss": 0.5239662528038025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5696742534637451, "epoch": 1.92, "learning_rate": 4.4895275664506436e-05, "loss": 0.491, "step": 2270, "task_loss": 1.0313329696655273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35590529441833496, "epoch": 1.92, "learning_rate": 4.489057950596412e-05, "loss": 0.4242, "step": 2271, "task_loss": 0.49920719861984253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45868146419525146, "epoch": 1.92, "learning_rate": 4.488588334742181e-05, "loss": 0.4737, "step": 2272, "task_loss": 0.5445338487625122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41158390045166016, "epoch": 1.92, "learning_rate": 4.4881187188879495e-05, "loss": 0.5087, "step": 2273, "task_loss": 0.08774694055318832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46242597699165344, "epoch": 1.92, "learning_rate": 4.487649103033719e-05, "loss": 0.5241, "step": 2274, "task_loss": 0.5694864988327026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5519911646842957, "epoch": 1.92, "learning_rate": 4.4871794871794874e-05, "loss": 0.4687, "step": 2275, "task_loss": 0.5505693554878235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3028916120529175, "epoch": 1.92, "learning_rate": 4.486709871325256e-05, "loss": 0.4878, "step": 2276, "task_loss": 0.4008864164352417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6774001121520996, "epoch": 1.92, "learning_rate": 4.486240255471025e-05, "loss": 0.6657, "step": 2277, "task_loss": 0.26391151547431946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5171905159950256, "epoch": 1.93, "learning_rate": 4.4857706396167933e-05, "loss": 0.5793, "step": 2278, "task_loss": 0.7273787260055542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5339009165763855, "epoch": 1.93, "learning_rate": 4.4853010237625627e-05, "loss": 0.5387, "step": 2279, "task_loss": 0.9003524780273438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.553004801273346, "epoch": 1.93, "learning_rate": 4.484831407908331e-05, "loss": 0.5062, "step": 2280, "task_loss": 0.8329215049743652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5915158987045288, "epoch": 1.93, "learning_rate": 4.4843617920541e-05, "loss": 0.5807, "step": 2281, "task_loss": 0.8880163431167603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34157514572143555, "epoch": 1.93, "learning_rate": 4.4838921761998686e-05, "loss": 0.4766, "step": 2282, "task_loss": 0.7678619027137756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.547350287437439, "epoch": 1.93, "learning_rate": 4.483422560345638e-05, "loss": 0.5114, "step": 2283, "task_loss": 0.6258320212364197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3251512944698334, "epoch": 1.93, "learning_rate": 4.4829529444914065e-05, "loss": 0.4494, "step": 2284, "task_loss": 0.4617779552936554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42849624156951904, "epoch": 1.93, "learning_rate": 4.4824833286371745e-05, "loss": 0.6358, "step": 2285, "task_loss": 0.7881009578704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4692254960536957, "epoch": 1.93, "learning_rate": 4.482013712782944e-05, "loss": 0.4964, "step": 2286, "task_loss": 1.0737584829330444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.744013786315918, "epoch": 1.93, "learning_rate": 4.4815440969287124e-05, "loss": 0.6587, "step": 2287, "task_loss": 0.9637811183929443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49526044726371765, "epoch": 1.93, "learning_rate": 4.481074481074482e-05, "loss": 0.5807, "step": 2288, "task_loss": 1.0202584266662598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4795507788658142, "epoch": 1.93, "learning_rate": 4.48060486522025e-05, "loss": 0.456, "step": 2289, "task_loss": 0.9396290183067322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2889283001422882, "epoch": 1.94, "learning_rate": 4.480135249366019e-05, "loss": 0.4967, "step": 2290, "task_loss": 0.5020221471786499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7438539266586304, "epoch": 1.94, "learning_rate": 4.4796656335117876e-05, "loss": 0.5163, "step": 2291, "task_loss": 2.0630650520324707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4943634867668152, "epoch": 1.94, "learning_rate": 4.479196017657556e-05, "loss": 0.517, "step": 2292, "task_loss": 1.1164608001708984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4785299301147461, "epoch": 1.94, "learning_rate": 4.4787264018033256e-05, "loss": 0.5257, "step": 2293, "task_loss": 0.6141465902328491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3097531795501709, "epoch": 1.94, "learning_rate": 4.4782567859490935e-05, "loss": 0.592, "step": 2294, "task_loss": 0.12399924546480179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5815852880477905, "epoch": 1.94, "learning_rate": 4.477787170094863e-05, "loss": 0.4104, "step": 2295, "task_loss": 0.48492249846458435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5815432071685791, "epoch": 1.94, "learning_rate": 4.4773175542406315e-05, "loss": 0.5207, "step": 2296, "task_loss": 1.0139658451080322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4569403827190399, "epoch": 1.94, "learning_rate": 4.4768479383864e-05, "loss": 0.5394, "step": 2297, "task_loss": 0.8580226898193359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4122565984725952, "epoch": 1.94, "learning_rate": 4.476378322532169e-05, "loss": 0.4653, "step": 2298, "task_loss": 0.3986000120639801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.509259819984436, "epoch": 1.94, "learning_rate": 4.4759087066779374e-05, "loss": 0.442, "step": 2299, "task_loss": 0.9061173796653748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42090505361557007, "epoch": 1.94, "learning_rate": 4.475439090823707e-05, "loss": 0.4628, "step": 2300, "task_loss": 1.6994291543960571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19289448857307434, "epoch": 1.94, "learning_rate": 4.474969474969475e-05, "loss": 0.4436, "step": 2301, "task_loss": 0.520093560218811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4882197380065918, "epoch": 1.95, "learning_rate": 4.474499859115244e-05, "loss": 0.6214, "step": 2302, "task_loss": 0.43309223651885986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33125039935112, "epoch": 1.95, "learning_rate": 4.4740302432610126e-05, "loss": 0.4446, "step": 2303, "task_loss": 1.5631426572799683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46719789505004883, "epoch": 1.95, "learning_rate": 4.473560627406781e-05, "loss": 0.4353, "step": 2304, "task_loss": 0.558542013168335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7402175664901733, "epoch": 1.95, "learning_rate": 4.4730910115525505e-05, "loss": 0.5682, "step": 2305, "task_loss": 0.7632941603660583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4942198395729065, "epoch": 1.95, "learning_rate": 4.4726213956983185e-05, "loss": 0.4506, "step": 2306, "task_loss": 1.0510542392730713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5052782297134399, "epoch": 1.95, "learning_rate": 4.472151779844088e-05, "loss": 0.398, "step": 2307, "task_loss": 0.4544996917247772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6877444982528687, "epoch": 1.95, "learning_rate": 4.4716821639898564e-05, "loss": 0.5308, "step": 2308, "task_loss": 0.6220576763153076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40043097734451294, "epoch": 1.95, "learning_rate": 4.471212548135625e-05, "loss": 0.5133, "step": 2309, "task_loss": 0.7104654908180237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6454329490661621, "epoch": 1.95, "learning_rate": 4.4707429322813944e-05, "loss": 0.5804, "step": 2310, "task_loss": 0.6819896101951599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4672989249229431, "epoch": 1.95, "learning_rate": 4.470273316427162e-05, "loss": 0.4208, "step": 2311, "task_loss": 0.5673284530639648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.880226731300354, "epoch": 1.95, "learning_rate": 4.4698037005729316e-05, "loss": 0.5783, "step": 2312, "task_loss": 0.895263135433197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5255374312400818, "epoch": 1.95, "learning_rate": 4.4693340847187e-05, "loss": 0.578, "step": 2313, "task_loss": 1.2168922424316406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.458711177110672, "epoch": 1.96, "learning_rate": 4.4688644688644696e-05, "loss": 0.5142, "step": 2314, "task_loss": 1.1933882236480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.341196209192276, "epoch": 1.96, "learning_rate": 4.4683948530102375e-05, "loss": 0.5817, "step": 2315, "task_loss": 0.5835264325141907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6838213205337524, "epoch": 1.96, "learning_rate": 4.467925237156007e-05, "loss": 0.5073, "step": 2316, "task_loss": 1.0636767148971558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5800464153289795, "epoch": 1.96, "learning_rate": 4.4674556213017755e-05, "loss": 0.5445, "step": 2317, "task_loss": 0.6344751715660095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37789446115493774, "epoch": 1.96, "learning_rate": 4.466986005447544e-05, "loss": 0.4076, "step": 2318, "task_loss": 0.42768341302871704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5368605256080627, "epoch": 1.96, "learning_rate": 4.466516389593313e-05, "loss": 0.3775, "step": 2319, "task_loss": 0.4113626182079315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4308410882949829, "epoch": 1.96, "learning_rate": 4.4660467737390814e-05, "loss": 0.4753, "step": 2320, "task_loss": 0.5586192607879639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3236071765422821, "epoch": 1.96, "learning_rate": 4.465577157884851e-05, "loss": 0.3887, "step": 2321, "task_loss": 0.4282023310661316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5250446796417236, "epoch": 1.96, "learning_rate": 4.4651075420306193e-05, "loss": 0.4784, "step": 2322, "task_loss": 0.4545106887817383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4591291844844818, "epoch": 1.96, "learning_rate": 4.464637926176388e-05, "loss": 0.4527, "step": 2323, "task_loss": 0.3421345353126526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49141281843185425, "epoch": 1.96, "learning_rate": 4.4641683103221566e-05, "loss": 0.5293, "step": 2324, "task_loss": 0.3139999806880951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3978886008262634, "epoch": 1.96, "learning_rate": 4.463698694467925e-05, "loss": 0.5748, "step": 2325, "task_loss": 0.779022753238678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3961014747619629, "epoch": 1.97, "learning_rate": 4.4632290786136946e-05, "loss": 0.5207, "step": 2326, "task_loss": 0.9287274479866028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31038859486579895, "epoch": 1.97, "learning_rate": 4.462759462759463e-05, "loss": 0.3922, "step": 2327, "task_loss": 0.29638344049453735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.502657413482666, "epoch": 1.97, "learning_rate": 4.462289846905232e-05, "loss": 0.5031, "step": 2328, "task_loss": 0.51375812292099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3382720649242401, "epoch": 1.97, "learning_rate": 4.4618202310510005e-05, "loss": 0.484, "step": 2329, "task_loss": 0.39023569226264954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.469430148601532, "epoch": 1.97, "learning_rate": 4.461350615196769e-05, "loss": 0.4501, "step": 2330, "task_loss": 0.8305456638336182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5457801222801208, "epoch": 1.97, "learning_rate": 4.4608809993425384e-05, "loss": 0.5484, "step": 2331, "task_loss": 0.3412542939186096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38987088203430176, "epoch": 1.97, "learning_rate": 4.4604113834883064e-05, "loss": 0.409, "step": 2332, "task_loss": 0.08229457587003708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35831141471862793, "epoch": 1.97, "learning_rate": 4.459941767634076e-05, "loss": 0.4106, "step": 2333, "task_loss": 0.649703323841095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48139315843582153, "epoch": 1.97, "learning_rate": 4.459472151779844e-05, "loss": 0.5709, "step": 2334, "task_loss": 0.9706647992134094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7979201078414917, "epoch": 1.97, "learning_rate": 4.459002535925613e-05, "loss": 0.6436, "step": 2335, "task_loss": 1.4212064743041992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4594099223613739, "epoch": 1.97, "learning_rate": 4.4585329200713816e-05, "loss": 0.6134, "step": 2336, "task_loss": 1.680701732635498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3693271279335022, "epoch": 1.97, "learning_rate": 4.45806330421715e-05, "loss": 0.4698, "step": 2337, "task_loss": 1.2624528408050537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5230467319488525, "epoch": 1.98, "learning_rate": 4.4575936883629195e-05, "loss": 0.4721, "step": 2338, "task_loss": 0.8766818046569824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46258798241615295, "epoch": 1.98, "learning_rate": 4.457124072508688e-05, "loss": 0.5232, "step": 2339, "task_loss": 0.11453549563884735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5493994355201721, "epoch": 1.98, "learning_rate": 4.4566544566544575e-05, "loss": 0.4927, "step": 2340, "task_loss": 0.1464962512254715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49796533584594727, "epoch": 1.98, "learning_rate": 4.4561848408002254e-05, "loss": 0.5328, "step": 2341, "task_loss": 0.43612194061279297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2752041518688202, "epoch": 1.98, "learning_rate": 4.455715224945994e-05, "loss": 0.3989, "step": 2342, "task_loss": 0.4040909707546234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3940029740333557, "epoch": 1.98, "learning_rate": 4.4552456090917634e-05, "loss": 0.3771, "step": 2343, "task_loss": 0.8706028461456299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36160773038864136, "epoch": 1.98, "learning_rate": 4.454775993237532e-05, "loss": 0.4592, "step": 2344, "task_loss": 0.7673732042312622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5324007272720337, "epoch": 1.98, "learning_rate": 4.4543063773833006e-05, "loss": 0.6002, "step": 2345, "task_loss": 1.7328389883041382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22970181703567505, "epoch": 1.98, "learning_rate": 4.453836761529069e-05, "loss": 0.428, "step": 2346, "task_loss": 0.2570614218711853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38340407609939575, "epoch": 1.98, "learning_rate": 4.4533671456748386e-05, "loss": 0.5139, "step": 2347, "task_loss": 0.4300479590892792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6223381161689758, "epoch": 1.98, "learning_rate": 4.452897529820607e-05, "loss": 0.6749, "step": 2348, "task_loss": 1.5073533058166504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2021184265613556, "epoch": 1.99, "learning_rate": 4.452427913966375e-05, "loss": 0.5348, "step": 2349, "task_loss": 0.11629484593868256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5701926350593567, "epoch": 1.99, "learning_rate": 4.4519582981121445e-05, "loss": 0.5556, "step": 2350, "task_loss": 0.7032594084739685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5005585551261902, "epoch": 1.99, "learning_rate": 4.451488682257913e-05, "loss": 0.4626, "step": 2351, "task_loss": 0.42340579628944397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6794813275337219, "epoch": 1.99, "learning_rate": 4.4510190664036824e-05, "loss": 0.4195, "step": 2352, "task_loss": 0.3115620017051697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.629858136177063, "epoch": 1.99, "learning_rate": 4.4505494505494504e-05, "loss": 0.6159, "step": 2353, "task_loss": 1.1281862258911133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3484354317188263, "epoch": 1.99, "learning_rate": 4.45007983469522e-05, "loss": 0.4341, "step": 2354, "task_loss": 0.15180468559265137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6878166198730469, "epoch": 1.99, "learning_rate": 4.449610218840988e-05, "loss": 0.4281, "step": 2355, "task_loss": 0.6516636610031128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.50490802526474, "epoch": 1.99, "learning_rate": 4.449140602986757e-05, "loss": 0.5107, "step": 2356, "task_loss": 0.8618326187133789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2663092017173767, "epoch": 1.99, "learning_rate": 4.448670987132526e-05, "loss": 0.4017, "step": 2357, "task_loss": 0.24696709215641022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3918919265270233, "epoch": 1.99, "learning_rate": 4.448201371278294e-05, "loss": 0.6027, "step": 2358, "task_loss": 0.18858911097049713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6453561186790466, "epoch": 1.99, "learning_rate": 4.4477317554240635e-05, "loss": 0.6237, "step": 2359, "task_loss": 0.9329038262367249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2780349850654602, "epoch": 1.99, "learning_rate": 4.447262139569832e-05, "loss": 0.3615, "step": 2360, "task_loss": 0.602092444896698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2617350220680237, "epoch": 2.0, "learning_rate": 4.446792523715601e-05, "loss": 0.4915, "step": 2361, "task_loss": 0.5249382257461548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4487283229827881, "epoch": 2.0, "learning_rate": 4.4463229078613694e-05, "loss": 0.568, "step": 2362, "task_loss": 0.1316448599100113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3856232166290283, "epoch": 2.0, "learning_rate": 4.445853292007138e-05, "loss": 0.4988, "step": 2363, "task_loss": 1.4342809915542603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2757651209831238, "epoch": 2.0, "learning_rate": 4.4453836761529074e-05, "loss": 0.625, "step": 2364, "task_loss": 1.5079602003097534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7427771687507629, "epoch": 2.0, "learning_rate": 4.444914060298676e-05, "loss": 0.5224, "step": 2365, "task_loss": 1.1306349039077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6972264051437378, "epoch": 2.0, "learning_rate": 4.4444444444444447e-05, "loss": 0.6178, "step": 2366, "task_loss": 1.0945106744766235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -0.007003734819591045, "compression/movement_sparsity/linear_layer_sparsity": 0.0010031444507022475, "compression/movement_sparsity/model_sparsity": 0.0009686833527873334, "compression_loss": 0.0, "distillation_loss": 0.5802435874938965, "epoch": 2.0, "learning_rate": 4.443974828590213e-05, "loss": 0.9462, "step": 2367, "task_loss": 1.607359528541565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0008443568368990961, "compression/movement_sparsity/importance_threshold": -0.006997821168212295, "compression/movement_sparsity/linear_layer_sparsity": 0.0010031444507022475, "compression/movement_sparsity/model_sparsity": 0.0009686833527873334, "compression_loss": 0.09122283011674881, "distillation_loss": 0.6684330105781555, "epoch": 2.0, "learning_rate": 4.443505212735982e-05, "loss": 0.6762, "step": 2368, "task_loss": 0.776258111000061 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0016882382476638425, "compression/movement_sparsity/importance_threshold": -0.006991910846592116, "compression/movement_sparsity/linear_layer_sparsity": 0.0010031563748698835, "compression/movement_sparsity/model_sparsity": 0.000968694867323128, "compression_loss": 0.18239431083202362, "distillation_loss": 0.5181126594543457, "epoch": 2.0, "learning_rate": 4.443035596881751e-05, "loss": 0.641, "step": 2369, "task_loss": 0.5361073613166809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.002531644366179364, "compression/movement_sparsity/importance_threshold": -0.0069860038537928135, "compression/movement_sparsity/linear_layer_sparsity": 0.0010092615486994453, "compression/movement_sparsity/model_sparsity": 0.0009745903096499329, "compression_loss": 0.2735142409801483, "distillation_loss": 0.7323172092437744, "epoch": 2.0, "learning_rate": 4.44256598102752e-05, "loss": 0.9388, "step": 2370, "task_loss": 0.7403607964515686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0033745753263308975, "compression/movement_sparsity/importance_threshold": -0.006980100188876689, "compression/movement_sparsity/linear_layer_sparsity": 0.0010092496245318096, "compression/movement_sparsity/model_sparsity": 0.0009745787951141384, "compression_loss": 0.36458295583724976, "distillation_loss": 0.4879717528820038, "epoch": 2.0, "learning_rate": 4.4420963651732885e-05, "loss": 0.8105, "step": 2371, "task_loss": 0.7959870100021362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.004217031262003457, "compression/movement_sparsity/importance_threshold": -0.006974199850906047, "compression/movement_sparsity/linear_layer_sparsity": 0.0010000918637874667, "compression/movement_sparsity/model_sparsity": 0.000965735631623931, "compression_loss": 0.4555998742580414, "distillation_loss": 0.3729327917098999, "epoch": 2.01, "learning_rate": 4.441626749319057e-05, "loss": 1.0359, "step": 2372, "task_loss": 1.5156570672988892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005059012307081834, "compression/movement_sparsity/importance_threshold": -0.006968302838943197, "compression/movement_sparsity/linear_layer_sparsity": 0.0010000918637874667, "compression/movement_sparsity/model_sparsity": 0.000965735631623931, "compression_loss": 0.546565055847168, "distillation_loss": 0.4787403345108032, "epoch": 2.01, "learning_rate": 4.441157133464826e-05, "loss": 1.0266, "step": 2373, "task_loss": 0.730780303478241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005900518595452042, "compression/movement_sparsity/importance_threshold": -0.006962409152050433, "compression/movement_sparsity/linear_layer_sparsity": 0.0010214599721909332, "compression/movement_sparsity/model_sparsity": 0.0009863696797677481, "compression_loss": 0.6374796032905579, "distillation_loss": 0.27051985263824463, "epoch": 2.01, "learning_rate": 4.440687517610595e-05, "loss": 1.1848, "step": 2374, "task_loss": 0.10961371660232544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.006741550260998652, "compression/movement_sparsity/importance_threshold": -0.006956518789290066, "compression/movement_sparsity/linear_layer_sparsity": 0.001030617732935276, "compression/movement_sparsity/model_sparsity": 0.0009952128432579555, "compression_loss": 0.7283419966697693, "distillation_loss": 0.4993123412132263, "epoch": 2.01, "learning_rate": 4.440217901756363e-05, "loss": 1.1853, "step": 2375, "task_loss": 0.709650993347168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007582107437607011, "compression/movement_sparsity/importance_threshold": -0.006950631749724397, "compression/movement_sparsity/linear_layer_sparsity": 0.001033670319850057, "compression/movement_sparsity/model_sparsity": 0.0009981605644213578, "compression_loss": 0.8191527724266052, "distillation_loss": 0.37680327892303467, "epoch": 2.01, "learning_rate": 4.4397482859021324e-05, "loss": 1.2489, "step": 2376, "task_loss": 0.5057814717292786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.008422190259162132, "compression/movement_sparsity/importance_threshold": -0.006944748032415731, "compression/movement_sparsity/linear_layer_sparsity": 0.0010489451785915974, "compression/movement_sparsity/model_sparsity": 0.0010129106847741646, "compression_loss": 0.9099125266075134, "distillation_loss": 0.48812663555145264, "epoch": 2.01, "learning_rate": 4.439278670047901e-05, "loss": 1.4283, "step": 2377, "task_loss": 1.0213834047317505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.009261798859549142, "compression/movement_sparsity/importance_threshold": -0.006938867636426372, "compression/movement_sparsity/linear_layer_sparsity": 0.0010458925916768166, "compression/movement_sparsity/model_sparsity": 0.0010099629636107622, "compression_loss": 1.000620722770691, "distillation_loss": 0.5056247115135193, "epoch": 2.01, "learning_rate": 4.43880905419367e-05, "loss": 1.5263, "step": 2378, "task_loss": 0.05860638618469238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010100933372653165, "compression/movement_sparsity/importance_threshold": -0.006932990560818625, "compression/movement_sparsity/linear_layer_sparsity": 0.0010459045158444523, "compression/movement_sparsity/model_sparsity": 0.0010099744781465567, "compression_loss": 1.0912773609161377, "distillation_loss": 0.375686913728714, "epoch": 2.01, "learning_rate": 4.438339438339438e-05, "loss": 1.651, "step": 2379, "task_loss": 0.32050254940986633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010939593932359548, "compression/movement_sparsity/importance_threshold": -0.006927116804654792, "compression/movement_sparsity/linear_layer_sparsity": 0.0010550622765887951, "compression/movement_sparsity/model_sparsity": 0.001018817641636764, "compression_loss": 1.1818833351135254, "distillation_loss": 0.3442913889884949, "epoch": 2.01, "learning_rate": 4.437869822485207e-05, "loss": 1.5776, "step": 2380, "task_loss": 0.4802236258983612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.011777780672553195, "compression/movement_sparsity/importance_threshold": -0.006921246366997178, "compression/movement_sparsity/linear_layer_sparsity": 0.001067272624247919, "compression/movement_sparsity/model_sparsity": 0.0010306085262903738, "compression_loss": 1.272438406944275, "distillation_loss": 0.5577852725982666, "epoch": 2.01, "learning_rate": 4.437400206630976e-05, "loss": 1.8478, "step": 2381, "task_loss": 1.0157655477523804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.012615493727119342, "compression/movement_sparsity/importance_threshold": -0.006915379246908087, "compression/movement_sparsity/linear_layer_sparsity": 0.0010489690269268692, "compression/movement_sparsity/model_sparsity": 0.0010129337138457537, "compression_loss": 1.3629417419433594, "distillation_loss": 0.3947383761405945, "epoch": 2.01, "learning_rate": 4.436930590776745e-05, "loss": 1.8274, "step": 2382, "task_loss": 0.8813773989677429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.013452733229942782, "compression/movement_sparsity/importance_threshold": -0.006909515443449825, "compression/movement_sparsity/linear_layer_sparsity": 0.001048980951094505, "compression/movement_sparsity/model_sparsity": 0.0010129452283815484, "compression_loss": 1.4533934593200684, "distillation_loss": 0.5739043354988098, "epoch": 2.01, "learning_rate": 4.4364609749225135e-05, "loss": 1.9145, "step": 2383, "task_loss": 0.46072179079055786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.014289499314909304, "compression/movement_sparsity/importance_threshold": -0.006903654955684692, "compression/movement_sparsity/linear_layer_sparsity": 0.0010520454621769219, "compression/movement_sparsity/model_sparsity": 0.0010159044640807452, "compression_loss": 1.5437926054000854, "distillation_loss": 0.7259434461593628, "epoch": 2.02, "learning_rate": 4.435991359068282e-05, "loss": 2.0552, "step": 2384, "task_loss": 0.8381109237670898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015125792115903702, "compression/movement_sparsity/importance_threshold": -0.006897797782674995, "compression/movement_sparsity/linear_layer_sparsity": 0.0010550980490917027, "compression/movement_sparsity/model_sparsity": 0.0010188521852441477, "compression_loss": 1.6341404914855957, "distillation_loss": 0.35940682888031006, "epoch": 2.02, "learning_rate": 4.4355217432140514e-05, "loss": 2.1314, "step": 2385, "task_loss": 0.46927493810653687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015961611766811212, "compression/movement_sparsity/importance_threshold": -0.006891943923483036, "compression/movement_sparsity/linear_layer_sparsity": 0.0010596769294638742, "compression/movement_sparsity/model_sparsity": 0.0010232737669892513, "compression_loss": 1.7244375944137573, "distillation_loss": 0.4708237051963806, "epoch": 2.02, "learning_rate": 4.43505212735982e-05, "loss": 2.2354, "step": 2386, "task_loss": 0.7868325710296631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01679695840151696, "compression/movement_sparsity/importance_threshold": -0.006886093377171118, "compression/movement_sparsity/linear_layer_sparsity": 0.001062729516378655, "compression/movement_sparsity/model_sparsity": 0.0010262214881526538, "compression_loss": 1.8146816492080688, "distillation_loss": 0.546553373336792, "epoch": 2.02, "learning_rate": 4.434582511505589e-05, "loss": 2.252, "step": 2387, "task_loss": 0.6013540625572205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.017631832153905957, "compression/movement_sparsity/importance_threshold": -0.00688024614280155, "compression/movement_sparsity/linear_layer_sparsity": 0.0010840976247821217, "compression/movement_sparsity/model_sparsity": 0.001046855536296471, "compression_loss": 1.9048746824264526, "distillation_loss": 0.3215025067329407, "epoch": 2.02, "learning_rate": 4.434112895651357e-05, "loss": 2.4363, "step": 2388, "task_loss": 0.3925120234489441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018466233157863443, "compression/movement_sparsity/importance_threshold": -0.00687440221943663, "compression/movement_sparsity/linear_layer_sparsity": 0.0011054895815208598, "compression/movement_sparsity/model_sparsity": 0.0010675126135118772, "compression_loss": 1.9950146675109863, "distillation_loss": 0.38974276185035706, "epoch": 2.02, "learning_rate": 4.433643279797126e-05, "loss": 2.5338, "step": 2389, "task_loss": 1.1412025690078735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01930016154727454, "compression/movement_sparsity/importance_threshold": -0.006868561606138667, "compression/movement_sparsity/linear_layer_sparsity": 0.0011085421684356408, "compression/movement_sparsity/model_sparsity": 0.0010704603346752795, "compression_loss": 2.085103750228882, "distillation_loss": 0.42029839754104614, "epoch": 2.02, "learning_rate": 4.433173663942895e-05, "loss": 2.6132, "step": 2390, "task_loss": 0.801195502281189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02013361745602449, "compression/movement_sparsity/importance_threshold": -0.006862724301969961, "compression/movement_sparsity/linear_layer_sparsity": 0.0011117855420325956, "compression/movement_sparsity/model_sparsity": 0.0010735922884113948, "compression_loss": 2.175140142440796, "distillation_loss": 0.45288002490997314, "epoch": 2.02, "learning_rate": 4.432704048088664e-05, "loss": 2.6946, "step": 2391, "task_loss": 0.30602532625198364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.020966601017998188, "compression/movement_sparsity/importance_threshold": -0.006856890305992819, "compression/movement_sparsity/linear_layer_sparsity": 0.0011180815025443312, "compression/movement_sparsity/model_sparsity": 0.001079671963310912, "compression_loss": 2.265123128890991, "distillation_loss": 0.2598922848701477, "epoch": 2.02, "learning_rate": 4.4322344322344325e-05, "loss": 2.8098, "step": 2392, "task_loss": 0.5974996089935303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.021799112367080653, "compression/movement_sparsity/importance_threshold": -0.006851059617269545, "compression/movement_sparsity/linear_layer_sparsity": 0.0011303037743710907, "compression/movement_sparsity/model_sparsity": 0.0010914743625003165, "compression_loss": 2.355052947998047, "distillation_loss": 0.4024933874607086, "epoch": 2.02, "learning_rate": 4.431764816380201e-05, "loss": 2.9442, "step": 2393, "task_loss": 0.13177450001239777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022631151637157676, "compression/movement_sparsity/importance_threshold": -0.006845232234862439, "compression/movement_sparsity/linear_layer_sparsity": 0.001154915256371512, "compression/movement_sparsity/model_sparsity": 0.0011152403643802486, "compression_loss": 2.4449310302734375, "distillation_loss": 0.52522873878479, "epoch": 2.02, "learning_rate": 4.43129520052597e-05, "loss": 3.0102, "step": 2394, "task_loss": 1.1190540790557861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.023462718962113938, "compression/movement_sparsity/importance_threshold": -0.006839408157833809, "compression/movement_sparsity/linear_layer_sparsity": 0.0011705597643097644, "compression/movement_sparsity/model_sparsity": 0.0011303474353426862, "compression_loss": 2.5347583293914795, "distillation_loss": 0.5930206775665283, "epoch": 2.02, "learning_rate": 4.430825584671739e-05, "loss": 3.0029, "step": 2395, "task_loss": 1.391053318977356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.024293814475834785, "compression/movement_sparsity/importance_threshold": -0.006833587385245956, "compression/movement_sparsity/linear_layer_sparsity": 0.0011829608986510618, "compression/movement_sparsity/model_sparsity": 0.0011423225525690086, "compression_loss": 2.6245336532592773, "distillation_loss": 0.4650835394859314, "epoch": 2.03, "learning_rate": 4.430355968817507e-05, "loss": 3.0461, "step": 2396, "task_loss": 0.8159640431404114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025124438312205122, "compression/movement_sparsity/importance_threshold": -0.006827769916161187, "compression/movement_sparsity/linear_layer_sparsity": 0.0011730399911780238, "compression/movement_sparsity/model_sparsity": 0.0011327424587879507, "compression_loss": 2.714254140853882, "distillation_loss": 0.5791108012199402, "epoch": 2.03, "learning_rate": 4.4298863529632764e-05, "loss": 3.2754, "step": 2397, "task_loss": 0.6679997444152832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025954590605110184, "compression/movement_sparsity/importance_threshold": -0.006821955749641805, "compression/movement_sparsity/linear_layer_sparsity": 0.0012121512610236546, "compression/movement_sparsity/model_sparsity": 0.0011705101361940446, "compression_loss": 2.803925037384033, "distillation_loss": 0.4154004752635956, "epoch": 2.03, "learning_rate": 4.429416737109045e-05, "loss": 3.2209, "step": 2398, "task_loss": 1.2762163877487183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.026784271488435207, "compression/movement_sparsity/importance_threshold": -0.006816144884750112, "compression/movement_sparsity/linear_layer_sparsity": 0.0012337101561092948, "compression/movement_sparsity/model_sparsity": 0.0011913284169105744, "compression_loss": 2.8935439586639404, "distillation_loss": 0.7611942887306213, "epoch": 2.03, "learning_rate": 4.4289471212548136e-05, "loss": 3.3399, "step": 2399, "task_loss": 1.2041484117507935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.027613481096065207, "compression/movement_sparsity/importance_threshold": -0.006810337320548415, "compression/movement_sparsity/linear_layer_sparsity": 0.0012401969033032042, "compression/movement_sparsity/model_sparsity": 0.0011975923243828044, "compression_loss": 2.983110189437866, "distillation_loss": 0.548946738243103, "epoch": 2.03, "learning_rate": 4.428477505400582e-05, "loss": 3.5766, "step": 2400, "task_loss": 1.5580856800079346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.028442219561885307, "compression/movement_sparsity/importance_threshold": -0.006804533056099016, "compression/movement_sparsity/linear_layer_sparsity": 0.0012648203094712614, "compression/movement_sparsity/model_sparsity": 0.0012213698407985312, "compression_loss": 3.0726253986358643, "distillation_loss": 0.3444945812225342, "epoch": 2.03, "learning_rate": 4.428007889546351e-05, "loss": 3.5869, "step": 2401, "task_loss": 0.8993453979492188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.029270487019780744, "compression/movement_sparsity/importance_threshold": -0.0067987320904642195, "compression/movement_sparsity/linear_layer_sparsity": 0.0012730241368047353, "compression/movement_sparsity/model_sparsity": 0.0012292918414251754, "compression_loss": 3.16208815574646, "distillation_loss": 0.7511818408966064, "epoch": 2.03, "learning_rate": 4.42753827369212e-05, "loss": 3.6665, "step": 2402, "task_loss": 0.9963592290878296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03009828360363631, "compression/movement_sparsity/importance_threshold": -0.006792934422706331, "compression/movement_sparsity/linear_layer_sparsity": 0.0012846502002497016, "compression/movement_sparsity/model_sparsity": 0.0012405185138248526, "compression_loss": 3.251498222351074, "distillation_loss": 0.2715146541595459, "epoch": 2.03, "learning_rate": 4.427068657837889e-05, "loss": 3.6994, "step": 2403, "task_loss": 0.32863888144493103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03092560944733791, "compression/movement_sparsity/importance_threshold": -0.006787140051887651, "compression/movement_sparsity/linear_layer_sparsity": 0.001308307748839254, "compression/movement_sparsity/model_sparsity": 0.0012633633528412215, "compression_loss": 3.3408586978912354, "distillation_loss": 0.6217576265335083, "epoch": 2.03, "learning_rate": 4.4265990419836575e-05, "loss": 3.8628, "step": 2404, "task_loss": 1.1299502849578857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03175246468477011, "compression/movement_sparsity/importance_threshold": -0.006781348977070486, "compression/movement_sparsity/linear_layer_sparsity": 0.0013352205951933965, "compression/movement_sparsity/model_sparsity": 0.0012893516601295, "compression_loss": 3.4301633834838867, "distillation_loss": 0.437610387802124, "epoch": 2.03, "learning_rate": 4.426129426129426e-05, "loss": 3.8313, "step": 2405, "task_loss": 0.2818341851234436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03257884944981815, "compression/movement_sparsity/importance_threshold": -0.0067755611973171394, "compression/movement_sparsity/linear_layer_sparsity": 0.001357733423689906, "compression/movement_sparsity/model_sparsity": 0.0013110911037095931, "compression_loss": 3.5194201469421387, "distillation_loss": 0.5858157277107239, "epoch": 2.03, "learning_rate": 4.425659810275195e-05, "loss": 3.9596, "step": 2406, "task_loss": 0.39481663703918457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03340476387636726, "compression/movement_sparsity/importance_threshold": -0.006769776711689915, "compression/movement_sparsity/linear_layer_sparsity": 0.0013873172835944822, "compression/movement_sparsity/model_sparsity": 0.0013396586670158489, "compression_loss": 3.6086208820343018, "distillation_loss": 0.17848718166351318, "epoch": 2.03, "learning_rate": 4.425190194420964e-05, "loss": 4.0177, "step": 2407, "task_loss": 0.959834635257721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03423020809830246, "compression/movement_sparsity/importance_threshold": -0.006763995519251117, "compression/movement_sparsity/linear_layer_sparsity": 0.0014109748321840343, "compression/movement_sparsity/model_sparsity": 0.0013625035060322178, "compression_loss": 3.6977672576904297, "distillation_loss": 0.600239634513855, "epoch": 2.04, "learning_rate": 4.424720578566733e-05, "loss": 4.1921, "step": 2408, "task_loss": 1.0775688886642456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03505518224950899, "compression/movement_sparsity/importance_threshold": -0.0067582176190630495, "compression/movement_sparsity/linear_layer_sparsity": 0.0014487625194220843, "compression/movement_sparsity/model_sparsity": 0.0013989930699651175, "compression_loss": 3.78686261177063, "distillation_loss": 0.6846114993095398, "epoch": 2.04, "learning_rate": 4.4242509627125013e-05, "loss": 4.2744, "step": 2409, "task_loss": 0.30470189452171326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03587968646387196, "compression/movement_sparsity/importance_threshold": -0.006752443010188016, "compression/movement_sparsity/linear_layer_sparsity": 0.0014741371481512007, "compression/movement_sparsity/model_sparsity": 0.0014234960021359004, "compression_loss": 3.8759052753448486, "distillation_loss": 0.4664623439311981, "epoch": 2.04, "learning_rate": 4.42378134685827e-05, "loss": 4.319, "step": 2410, "task_loss": 0.4815807342529297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0367037208752764, "compression/movement_sparsity/importance_threshold": -0.006746671691688321, "compression/movement_sparsity/linear_layer_sparsity": 0.0015254587656559552, "compression/movement_sparsity/model_sparsity": 0.001473054564195604, "compression_loss": 3.9648962020874023, "distillation_loss": 0.35947346687316895, "epoch": 2.04, "learning_rate": 4.423311731004039e-05, "loss": 4.5273, "step": 2411, "task_loss": 1.190804123878479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03752728561760765, "compression/movement_sparsity/importance_threshold": -0.0067409036626262684, "compression/movement_sparsity/linear_layer_sparsity": 0.0015901473750805119, "compression/movement_sparsity/model_sparsity": 0.0015355209208809876, "compression_loss": 4.053835391998291, "distillation_loss": 0.731823742389679, "epoch": 2.04, "learning_rate": 4.422842115149808e-05, "loss": 4.4758, "step": 2412, "task_loss": 0.2713839113712311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.038350380824750396, "compression/movement_sparsity/importance_threshold": -0.0067351389220641645, "compression/movement_sparsity/linear_layer_sparsity": 0.0016405269833420332, "compression/movement_sparsity/model_sparsity": 0.0015841698346129227, "compression_loss": 4.1427226066589355, "distillation_loss": 0.279991090297699, "epoch": 2.04, "learning_rate": 4.422372499295576e-05, "loss": 4.6938, "step": 2413, "task_loss": 0.4196307957172394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03917300663059042, "compression/movement_sparsity/importance_threshold": -0.006729377469064308, "compression/movement_sparsity/linear_layer_sparsity": 0.0016901315207072233, "compression/movement_sparsity/model_sparsity": 0.0016320703035182123, "compression_loss": 4.231555938720703, "distillation_loss": 0.7195446491241455, "epoch": 2.04, "learning_rate": 4.421902883441345e-05, "loss": 4.6808, "step": 2414, "task_loss": 0.7009245157241821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03999516316901264, "compression/movement_sparsity/importance_threshold": -0.006723619302689006, "compression/movement_sparsity/linear_layer_sparsity": 0.0017322953774676349, "compression/movement_sparsity/model_sparsity": 0.0016727857020877085, "compression_loss": 4.320333957672119, "distillation_loss": 0.48594576120376587, "epoch": 2.04, "learning_rate": 4.421433267587114e-05, "loss": 4.8165, "step": 2415, "task_loss": 0.350453644990921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04081685057390205, "compression/movement_sparsity/importance_threshold": -0.006717864422000563, "compression/movement_sparsity/linear_layer_sparsity": 0.0017637751800263133, "compression/movement_sparsity/model_sparsity": 0.0017031840765852962, "compression_loss": 4.409062385559082, "distillation_loss": 0.6417086720466614, "epoch": 2.04, "learning_rate": 4.420963651732883e-05, "loss": 4.9643, "step": 2416, "task_loss": 0.700954020023346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.041638068979143905, "compression/movement_sparsity/importance_threshold": -0.006712112826061281, "compression/movement_sparsity/linear_layer_sparsity": 0.0018170046643528057, "compression/movement_sparsity/model_sparsity": 0.0017545849643721264, "compression_loss": 4.497735500335693, "distillation_loss": 0.441211998462677, "epoch": 2.04, "learning_rate": 4.420494035878652e-05, "loss": 4.9931, "step": 2417, "task_loss": 0.8304980397224426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04245881851862332, "compression/movement_sparsity/importance_threshold": -0.006706364513933466, "compression/movement_sparsity/linear_layer_sparsity": 0.0018530633472836555, "compression/movement_sparsity/model_sparsity": 0.0017894049206148178, "compression_loss": 4.586360931396484, "distillation_loss": 0.527942419052124, "epoch": 2.04, "learning_rate": 4.4200244200244204e-05, "loss": 5.2119, "step": 2418, "task_loss": 0.41304224729537964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04327909932622542, "compression/movement_sparsity/importance_threshold": -0.006700619484679421, "compression/movement_sparsity/linear_layer_sparsity": 0.001922318912912748, "compression/movement_sparsity/model_sparsity": 0.0018562813445095108, "compression_loss": 4.674930095672607, "distillation_loss": 0.3811076581478119, "epoch": 2.04, "learning_rate": 4.419554804170189e-05, "loss": 5.1897, "step": 2419, "task_loss": 0.12234149873256683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04409891153583545, "compression/movement_sparsity/importance_threshold": -0.00669487773736145, "compression/movement_sparsity/linear_layer_sparsity": 0.0019389054300942333, "compression/movement_sparsity/model_sparsity": 0.001872298063799717, "compression_loss": 4.763443946838379, "distillation_loss": 0.5080851316452026, "epoch": 2.05, "learning_rate": 4.419085188315958e-05, "loss": 5.2327, "step": 2420, "task_loss": 0.8528552651405334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.044918255281338304, "compression/movement_sparsity/importance_threshold": -0.006689139271041857, "compression/movement_sparsity/linear_layer_sparsity": 0.0019370094874401312, "compression/movement_sparsity/model_sparsity": 0.001870467252608385, "compression_loss": 4.85191011428833, "distillation_loss": 0.45705446600914, "epoch": 2.05, "learning_rate": 4.418615572461727e-05, "loss": 5.3223, "step": 2421, "task_loss": 0.7068832516670227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04573713069661933, "compression/movement_sparsity/importance_threshold": -0.006683404084782946, "compression/movement_sparsity/linear_layer_sparsity": 0.0019793760550503524, "compression/movement_sparsity/model_sparsity": 0.0019113783982863883, "compression_loss": 4.940328598022461, "distillation_loss": 0.6961550712585449, "epoch": 2.05, "learning_rate": 4.418145956607495e-05, "loss": 5.3905, "step": 2422, "task_loss": 1.6397305727005005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04655553791556333, "compression/movement_sparsity/importance_threshold": -0.006677672177647024, "compression/movement_sparsity/linear_layer_sparsity": 0.0020410001533924926, "compression/movement_sparsity/model_sparsity": 0.0019708855192725754, "compression_loss": 5.0286865234375, "distillation_loss": 0.5057326555252075, "epoch": 2.05, "learning_rate": 4.417676340753264e-05, "loss": 5.4762, "step": 2423, "task_loss": 0.2978403568267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04737347707205597, "compression/movement_sparsity/importance_threshold": -0.006671943548696389, "compression/movement_sparsity/linear_layer_sparsity": 0.0021424986683089586, "compression/movement_sparsity/model_sparsity": 0.0020688972479557064, "compression_loss": 5.116994857788086, "distillation_loss": 0.4109514653682709, "epoch": 2.05, "learning_rate": 4.417206724899033e-05, "loss": 5.6792, "step": 2424, "task_loss": 0.4336155652999878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04819094829998227, "compression/movement_sparsity/importance_threshold": -0.006666218196993348, "compression/movement_sparsity/linear_layer_sparsity": 0.00223674728930282, "compression/movement_sparsity/model_sparsity": 0.0021599081388757567, "compression_loss": 5.205246448516846, "distillation_loss": 0.7307251691818237, "epoch": 2.05, "learning_rate": 4.4167371090448015e-05, "loss": 5.8809, "step": 2425, "task_loss": 0.9712884426116943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04900795173322714, "compression/movement_sparsity/importance_threshold": -0.006660496121600205, "compression/movement_sparsity/linear_layer_sparsity": 0.002262503491396284, "compression/movement_sparsity/model_sparsity": 0.002184779536191965, "compression_loss": 5.293447971343994, "distillation_loss": 0.7137432098388672, "epoch": 2.05, "learning_rate": 4.41626749319057e-05, "loss": 5.8125, "step": 2426, "task_loss": 0.23793554306030273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04982448750567581, "compression/movement_sparsity/importance_threshold": -0.006654777321579265, "compression/movement_sparsity/linear_layer_sparsity": 0.0022905729820111055, "compression/movement_sparsity/model_sparsity": 0.002211884753452314, "compression_loss": 5.381595134735107, "distillation_loss": 0.4867357909679413, "epoch": 2.05, "learning_rate": 4.415797877336339e-05, "loss": 5.7374, "step": 2427, "task_loss": 0.5562476515769958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05064055575121351, "compression/movement_sparsity/importance_threshold": -0.00664906179599283, "compression/movement_sparsity/linear_layer_sparsity": 0.002354486520539331, "compression/movement_sparsity/model_sparsity": 0.0022736026653110528, "compression_loss": 5.469688415527344, "distillation_loss": 0.44602248072624207, "epoch": 2.05, "learning_rate": 4.415328261482108e-05, "loss": 5.8707, "step": 2428, "task_loss": 0.3602611720561981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05145615660372527, "compression/movement_sparsity/importance_threshold": -0.006643349543903205, "compression/movement_sparsity/linear_layer_sparsity": 0.0023977950973927856, "compression/movement_sparsity/model_sparsity": 0.002315423459316825, "compression_loss": 5.557729721069336, "distillation_loss": 0.4859420359134674, "epoch": 2.05, "learning_rate": 4.414858645627877e-05, "loss": 6.0061, "step": 2429, "task_loss": 0.7623806595802307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05227129019709631, "compression/movement_sparsity/importance_threshold": -0.006637640564372694, "compression/movement_sparsity/linear_layer_sparsity": 0.002436512869706433, "compression/movement_sparsity/model_sparsity": 0.002352811157041699, "compression_loss": 5.6457200050354, "distillation_loss": 0.5332766175270081, "epoch": 2.05, "learning_rate": 4.4143890297736454e-05, "loss": 6.1881, "step": 2430, "task_loss": 1.1421220302581787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05308595666521165, "compression/movement_sparsity/importance_threshold": -0.006631934856463601, "compression/movement_sparsity/linear_layer_sparsity": 0.0025361154419687964, "compression/movement_sparsity/model_sparsity": 0.002448992074533498, "compression_loss": 5.73365592956543, "distillation_loss": 0.47381317615509033, "epoch": 2.05, "learning_rate": 4.413919413919414e-05, "loss": 6.2071, "step": 2431, "task_loss": 0.6038297414779663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.053900156141956646, "compression/movement_sparsity/importance_threshold": -0.006626232419238229, "compression/movement_sparsity/linear_layer_sparsity": 0.002566068951070084, "compression/movement_sparsity/model_sparsity": 0.0024779165884493844, "compression_loss": 5.821540355682373, "distillation_loss": 0.4869072437286377, "epoch": 2.06, "learning_rate": 4.4134497980651826e-05, "loss": 6.3329, "step": 2432, "task_loss": 0.49263009428977966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.054713888761215856, "compression/movement_sparsity/importance_threshold": -0.006620533251758887, "compression/movement_sparsity/linear_layer_sparsity": 0.0025939238066674604, "compression/movement_sparsity/model_sparsity": 0.0025048145440654318, "compression_loss": 5.909368991851807, "distillation_loss": 0.4851858913898468, "epoch": 2.06, "learning_rate": 4.412980182210952e-05, "loss": 6.3865, "step": 2433, "task_loss": 1.0558338165283203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0555271546568753, "compression/movement_sparsity/importance_threshold": -0.006614837353087871, "compression/movement_sparsity/linear_layer_sparsity": 0.0026494665795153104, "compression/movement_sparsity/model_sparsity": 0.0025584492517964027, "compression_loss": 5.997147083282471, "distillation_loss": 0.39389896392822266, "epoch": 2.06, "learning_rate": 4.4125105663567206e-05, "loss": 6.4042, "step": 2434, "task_loss": 0.4758550822734833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.056339953962819544, "compression/movement_sparsity/importance_threshold": -0.006609144722287489, "compression/movement_sparsity/linear_layer_sparsity": 0.0027419861962019714, "compression/movement_sparsity/model_sparsity": 0.002647790535026245, "compression_loss": 6.08487606048584, "distillation_loss": 0.6093442440032959, "epoch": 2.06, "learning_rate": 4.412040950502489e-05, "loss": 6.6246, "step": 2435, "task_loss": 1.1199067831039429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05715228681293394, "compression/movement_sparsity/importance_threshold": -0.006603455358420046, "compression/movement_sparsity/linear_layer_sparsity": 0.0028184678074183967, "compression/movement_sparsity/model_sparsity": 0.0027216447676124294, "compression_loss": 6.172552108764648, "distillation_loss": 0.7560376524925232, "epoch": 2.06, "learning_rate": 4.411571334648258e-05, "loss": 6.6906, "step": 2436, "task_loss": 0.9296551942825317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05796415334110361, "compression/movement_sparsity/importance_threshold": -0.006597769260547844, "compression/movement_sparsity/linear_layer_sparsity": 0.002855671210442289, "compression/movement_sparsity/model_sparsity": 0.002757570119291397, "compression_loss": 6.26017427444458, "distillation_loss": 0.7523699998855591, "epoch": 2.06, "learning_rate": 4.4111017187940265e-05, "loss": 6.8076, "step": 2437, "task_loss": 1.1962324380874634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.058775553681213566, "compression/movement_sparsity/importance_threshold": -0.006592086427733187, "compression/movement_sparsity/linear_layer_sparsity": 0.002935420043590941, "compression/movement_sparsity/model_sparsity": 0.002834579334685286, "compression_loss": 6.347743511199951, "distillation_loss": 0.5805315971374512, "epoch": 2.06, "learning_rate": 4.410632102939796e-05, "loss": 6.8751, "step": 2438, "task_loss": 0.4322480857372284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05958648796714905, "compression/movement_sparsity/importance_threshold": -0.006586406859038381, "compression/movement_sparsity/linear_layer_sparsity": 0.0030592525244893787, "compression/movement_sparsity/model_sparsity": 0.0029541577889115917, "compression_loss": 6.435264587402344, "distillation_loss": 0.42431050539016724, "epoch": 2.06, "learning_rate": 4.410162487085564e-05, "loss": 6.9902, "step": 2439, "task_loss": 0.42653849720954895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06039695633279518, "compression/movement_sparsity/importance_threshold": -0.006580730553525728, "compression/movement_sparsity/linear_layer_sparsity": 0.0031496854118397634, "compression/movement_sparsity/model_sparsity": 0.003041484028377389, "compression_loss": 6.522731304168701, "distillation_loss": 0.5754430890083313, "epoch": 2.06, "learning_rate": 4.409692871231333e-05, "loss": 7.0934, "step": 2440, "task_loss": 0.36451107263565063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06120695891203709, "compression/movement_sparsity/importance_threshold": -0.006575057510257533, "compression/movement_sparsity/linear_layer_sparsity": 0.003232486831903196, "compression/movement_sparsity/model_sparsity": 0.0031214409649346804, "compression_loss": 6.610144138336182, "distillation_loss": 0.2555873692035675, "epoch": 2.06, "learning_rate": 4.409223255377102e-05, "loss": 6.9159, "step": 2441, "task_loss": 0.25501060485839844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06201649583876001, "compression/movement_sparsity/importance_threshold": -0.006569387728296099, "compression/movement_sparsity/linear_layer_sparsity": 0.0033410444540600934, "compression/movement_sparsity/model_sparsity": 0.00322626929880818, "compression_loss": 6.6974992752075195, "distillation_loss": 0.439372181892395, "epoch": 2.06, "learning_rate": 4.408753639522871e-05, "loss": 7.1195, "step": 2442, "task_loss": 0.6739282608032227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06282556724684862, "compression/movement_sparsity/importance_threshold": -0.006563721206703733, "compression/movement_sparsity/linear_layer_sparsity": 0.003441767898080228, "compression/movement_sparsity/model_sparsity": 0.0033235325826646662, "compression_loss": 6.784798622131348, "distillation_loss": 0.631488561630249, "epoch": 2.07, "learning_rate": 4.408284023668639e-05, "loss": 7.153, "step": 2443, "task_loss": 0.38315266370773315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06363417327018872, "compression/movement_sparsity/importance_threshold": -0.006558057944542735, "compression/movement_sparsity/linear_layer_sparsity": 0.0035547255380947583, "compression/movement_sparsity/model_sparsity": 0.003432609780246351, "compression_loss": 6.872047424316406, "distillation_loss": 0.8471643328666687, "epoch": 2.07, "learning_rate": 4.4078144078144076e-05, "loss": 7.432, "step": 2444, "task_loss": 0.9422128796577454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0644423140426652, "compression/movement_sparsity/importance_threshold": -0.00655239794087541, "compression/movement_sparsity/linear_layer_sparsity": 0.0037258611920046644, "compression/movement_sparsity/model_sparsity": 0.0035978663979696003, "compression_loss": 6.9592437744140625, "distillation_loss": 0.4266647696495056, "epoch": 2.07, "learning_rate": 4.407344791960177e-05, "loss": 7.4334, "step": 2445, "task_loss": 0.20151148736476898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0652499896981632, "compression/movement_sparsity/importance_threshold": -0.0065467411947640625, "compression/movement_sparsity/linear_layer_sparsity": 0.0038073390294605164, "compression/movement_sparsity/model_sparsity": 0.0036765452210536977, "compression_loss": 7.046391010284424, "distillation_loss": 0.44936418533325195, "epoch": 2.07, "learning_rate": 4.4068751761059455e-05, "loss": 7.6431, "step": 2446, "task_loss": 0.68747878074646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06605720037056773, "compression/movement_sparsity/importance_threshold": -0.006541087705270998, "compression/movement_sparsity/linear_layer_sparsity": 0.003908658681862445, "compression/movement_sparsity/model_sparsity": 0.003774384231699911, "compression_loss": 7.133489608764648, "distillation_loss": 0.41012096405029297, "epoch": 2.07, "learning_rate": 4.406405560251715e-05, "loss": 7.5644, "step": 2447, "task_loss": 0.45452436804771423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06686394619376412, "compression/movement_sparsity/importance_threshold": -0.006535437471458518, "compression/movement_sparsity/linear_layer_sparsity": 0.003995848195615875, "compression/movement_sparsity/model_sparsity": 0.003858578517429593, "compression_loss": 7.220540523529053, "distillation_loss": 0.7610046863555908, "epoch": 2.07, "learning_rate": 4.405935944397483e-05, "loss": 7.7841, "step": 2448, "task_loss": 0.7570705413818359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06767022730163741, "compression/movement_sparsity/importance_threshold": -0.006529790492388927, "compression/movement_sparsity/linear_layer_sparsity": 0.004108030764734074, "compression/movement_sparsity/model_sparsity": 0.003966907270184633, "compression_loss": 7.307538986206055, "distillation_loss": 0.686468780040741, "epoch": 2.07, "learning_rate": 4.405466328543252e-05, "loss": 7.8301, "step": 2449, "task_loss": 0.8293174505233765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06847604382807271, "compression/movement_sparsity/importance_threshold": -0.00652414676712453, "compression/movement_sparsity/linear_layer_sparsity": 0.004233592249939712, "compression/movement_sparsity/model_sparsity": 0.004088155332101147, "compression_loss": 7.394484996795654, "distillation_loss": 0.5806015729904175, "epoch": 2.07, "learning_rate": 4.404996712689021e-05, "loss": 7.9085, "step": 2450, "task_loss": 0.9789434671401978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06928139590695526, "compression/movement_sparsity/importance_threshold": -0.006518506294727631, "compression/movement_sparsity/linear_layer_sparsity": 0.004365056198125101, "compression/movement_sparsity/model_sparsity": 0.00421510308923596, "compression_loss": 7.481378078460693, "distillation_loss": 0.2894788384437561, "epoch": 2.07, "learning_rate": 4.4045270968347894e-05, "loss": 7.7723, "step": 2451, "task_loss": 0.16470776498317719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07008628367217018, "compression/movement_sparsity/importance_threshold": -0.006512869074260532, "compression/movement_sparsity/linear_layer_sparsity": 0.004492322839302667, "compression/movement_sparsity/model_sparsity": 0.004337997729771094, "compression_loss": 7.568211078643799, "distillation_loss": 0.36692577600479126, "epoch": 2.07, "learning_rate": 4.404057480980558e-05, "loss": 8.0048, "step": 2452, "task_loss": 0.649534285068512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07089070725760216, "compression/movement_sparsity/importance_threshold": -0.0065072351047855415, "compression/movement_sparsity/linear_layer_sparsity": 0.00463599713514718, "compression/movement_sparsity/model_sparsity": 0.004476736371559516, "compression_loss": 7.654995441436768, "distillation_loss": 0.36742693185806274, "epoch": 2.07, "learning_rate": 4.4035878651263267e-05, "loss": 8.1637, "step": 2453, "task_loss": 1.0155671834945679 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0716946667971371, "compression/movement_sparsity/importance_threshold": -0.006501604385364958, "compression/movement_sparsity/linear_layer_sparsity": 0.004776797706591451, "compression/movement_sparsity/model_sparsity": 0.004612700010221454, "compression_loss": 7.741716384887695, "distillation_loss": 0.3914766311645508, "epoch": 2.07, "learning_rate": 4.403118249272096e-05, "loss": 8.2329, "step": 2454, "task_loss": 0.4086741507053375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07249816242465978, "compression/movement_sparsity/importance_threshold": -0.006495976915061088, "compression/movement_sparsity/linear_layer_sparsity": 0.004889755346605981, "compression/movement_sparsity/model_sparsity": 0.004721777207803138, "compression_loss": 7.828385829925537, "distillation_loss": 0.3016759157180786, "epoch": 2.08, "learning_rate": 4.4026486334178646e-05, "loss": 8.2195, "step": 2455, "task_loss": 0.37689492106437683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07330119427405535, "compression/movement_sparsity/importance_threshold": -0.006490352692936236, "compression/movement_sparsity/linear_layer_sparsity": 0.005030365131368078, "compression/movement_sparsity/model_sparsity": 0.004857556613892363, "compression_loss": 7.915004730224609, "distillation_loss": 0.4386562705039978, "epoch": 2.08, "learning_rate": 4.402179017563633e-05, "loss": 8.3814, "step": 2456, "task_loss": 0.7678865790367126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07410376247920902, "compression/movement_sparsity/importance_threshold": -0.006484731718052705, "compression/movement_sparsity/linear_layer_sparsity": 0.0052091203283973, "compression/movement_sparsity/model_sparsity": 0.005030171019988324, "compression_loss": 8.00157356262207, "distillation_loss": 0.5517943501472473, "epoch": 2.08, "learning_rate": 4.401709401709402e-05, "loss": 8.4303, "step": 2457, "task_loss": 0.8872720003128052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07490586717400582, "compression/movement_sparsity/importance_threshold": -0.006479113989472799, "compression/movement_sparsity/linear_layer_sparsity": 0.005423194909963949, "compression/movement_sparsity/model_sparsity": 0.005236891481107716, "compression_loss": 8.088088989257812, "distillation_loss": 0.36474883556365967, "epoch": 2.08, "learning_rate": 4.4012397858551705e-05, "loss": 8.5516, "step": 2458, "task_loss": 0.43918728828430176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07570750849233099, "compression/movement_sparsity/importance_threshold": -0.006473499506258822, "compression/movement_sparsity/linear_layer_sparsity": 0.005534805119035627, "compression/movement_sparsity/model_sparsity": 0.005344667536144617, "compression_loss": 8.174554824829102, "distillation_loss": 0.2895936369895935, "epoch": 2.08, "learning_rate": 4.40077017000094e-05, "loss": 8.6167, "step": 2459, "task_loss": 0.5829182863235474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07650868656806953, "compression/movement_sparsity/importance_threshold": -0.006467888267473079, "compression/movement_sparsity/linear_layer_sparsity": 0.005668940080771449, "compression/movement_sparsity/model_sparsity": 0.005474194549297407, "compression_loss": 8.260965347290039, "distillation_loss": 0.5662906169891357, "epoch": 2.08, "learning_rate": 4.400300554146708e-05, "loss": 8.8727, "step": 2460, "task_loss": 0.9015250205993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07730940153510668, "compression/movement_sparsity/importance_threshold": -0.006462280272177873, "compression/movement_sparsity/linear_layer_sparsity": 0.005829785178011606, "compression/movement_sparsity/model_sparsity": 0.005629514122629967, "compression_loss": 8.347328186035156, "distillation_loss": 0.5136330723762512, "epoch": 2.08, "learning_rate": 4.399830938292477e-05, "loss": 8.8908, "step": 2461, "task_loss": 0.45607006549835205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07810965352732757, "compression/movement_sparsity/importance_threshold": -0.006456675519435509, "compression/movement_sparsity/linear_layer_sparsity": 0.006043657048728445, "compression/movement_sparsity/model_sparsity": 0.005836038836640851, "compression_loss": 8.433639526367188, "distillation_loss": 0.4142168462276459, "epoch": 2.08, "learning_rate": 4.399361322438246e-05, "loss": 9.1706, "step": 2462, "task_loss": 0.4710100591182709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0789094426786171, "compression/movement_sparsity/importance_threshold": -0.006451074008308291, "compression/movement_sparsity/linear_layer_sparsity": 0.006269739267104408, "compression/movement_sparsity/model_sparsity": 0.006054354435305344, "compression_loss": 8.519902229309082, "distillation_loss": 0.4787374436855316, "epoch": 2.08, "learning_rate": 4.3988917065840143e-05, "loss": 9.1018, "step": 2463, "task_loss": 0.45049142837524414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07970876912286096, "compression/movement_sparsity/importance_threshold": -0.006445475737858521, "compression/movement_sparsity/linear_layer_sparsity": 0.006413592425463459, "compression/movement_sparsity/model_sparsity": 0.006193265795130685, "compression_loss": 8.606109619140625, "distillation_loss": 0.6395452618598938, "epoch": 2.08, "learning_rate": 4.398422090729784e-05, "loss": 9.0268, "step": 2464, "task_loss": 0.16611601412296295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08050763299394392, "compression/movement_sparsity/importance_threshold": -0.006439880707148503, "compression/movement_sparsity/linear_layer_sparsity": 0.006644444310893767, "compression/movement_sparsity/model_sparsity": 0.006416187208112994, "compression_loss": 8.692270278930664, "distillation_loss": 0.6031801700592041, "epoch": 2.08, "learning_rate": 4.3979524748755516e-05, "loss": 9.3505, "step": 2465, "task_loss": 0.15452896058559418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08130603442575124, "compression/movement_sparsity/importance_threshold": -0.006434288915240543, "compression/movement_sparsity/linear_layer_sparsity": 0.006847441340726699, "compression/movement_sparsity/model_sparsity": 0.006612210665479257, "compression_loss": 8.778373718261719, "distillation_loss": 0.7114195823669434, "epoch": 2.08, "learning_rate": 4.397482859021321e-05, "loss": 9.221, "step": 2466, "task_loss": 0.7676072120666504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08210397355216792, "compression/movement_sparsity/importance_threshold": -0.006428700361196945, "compression/movement_sparsity/linear_layer_sparsity": 0.007123879319028911, "compression/movement_sparsity/model_sparsity": 0.006879152148804096, "compression_loss": 8.864429473876953, "distillation_loss": 0.43020790815353394, "epoch": 2.09, "learning_rate": 4.3970132431670896e-05, "loss": 9.4159, "step": 2467, "task_loss": 0.6674639582633972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0829014505070792, "compression/movement_sparsity/importance_threshold": -0.006423115044080011, "compression/movement_sparsity/linear_layer_sparsity": 0.007319829165789048, "compression/movement_sparsity/model_sparsity": 0.0070683705155157855, "compression_loss": 8.950435638427734, "distillation_loss": 0.4351471960544586, "epoch": 2.09, "learning_rate": 4.396543627312858e-05, "loss": 9.4226, "step": 2468, "task_loss": 1.3550218343734741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0836984654243702, "compression/movement_sparsity/importance_threshold": -0.006417532962952046, "compression/movement_sparsity/linear_layer_sparsity": 0.0074594730929726395, "compression/movement_sparsity/model_sparsity": 0.007203217244205653, "compression_loss": 9.036383628845215, "distillation_loss": 0.3953562080860138, "epoch": 2.09, "learning_rate": 4.396074011458627e-05, "loss": 9.4771, "step": 2469, "task_loss": 0.12599721550941467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08449501843792617, "compression/movement_sparsity/importance_threshold": -0.006411954116875354, "compression/movement_sparsity/linear_layer_sparsity": 0.007694915782942755, "compression/movement_sparsity/model_sparsity": 0.007430571753468861, "compression_loss": 9.122285842895508, "distillation_loss": 0.7781376838684082, "epoch": 2.09, "learning_rate": 4.3956043956043955e-05, "loss": 9.7639, "step": 2470, "task_loss": 0.8043678998947144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.085291109681632, "compression/movement_sparsity/importance_threshold": -0.00640637850491224, "compression/movement_sparsity/linear_layer_sparsity": 0.007765506855347064, "compression/movement_sparsity/model_sparsity": 0.007498737805372542, "compression_loss": 9.208134651184082, "distillation_loss": 0.3136378824710846, "epoch": 2.09, "learning_rate": 4.395134779750165e-05, "loss": 9.7037, "step": 2471, "task_loss": 0.9936558604240417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08608673928937305, "compression/movement_sparsity/importance_threshold": -0.0064008061261250065, "compression/movement_sparsity/linear_layer_sparsity": 0.007930728122109583, "compression/movement_sparsity/model_sparsity": 0.0076582832133416994, "compression_loss": 9.293932914733887, "distillation_loss": 0.9721150994300842, "epoch": 2.09, "learning_rate": 4.3946651638959334e-05, "loss": 10.092, "step": 2472, "task_loss": 0.48359057307243347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08688190739503443, "compression/movement_sparsity/importance_threshold": -0.006395236979575958, "compression/movement_sparsity/linear_layer_sparsity": 0.008300663498844596, "compression/movement_sparsity/model_sparsity": 0.008015510171831533, "compression_loss": 9.379682540893555, "distillation_loss": 0.5698330402374268, "epoch": 2.09, "learning_rate": 4.394195548041702e-05, "loss": 9.9619, "step": 2473, "task_loss": 1.0545095205307007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08767661413250083, "compression/movement_sparsity/importance_threshold": -0.006389671064327401, "compression/movement_sparsity/linear_layer_sparsity": 0.008496219848072748, "compression/movement_sparsity/model_sparsity": 0.008204348558862002, "compression_loss": 9.465387344360352, "distillation_loss": 0.42641210556030273, "epoch": 2.09, "learning_rate": 4.393725932187471e-05, "loss": 10.0066, "step": 2474, "task_loss": 0.5793523192405701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08847085963565826, "compression/movement_sparsity/importance_threshold": -0.006384108379441634, "compression/movement_sparsity/linear_layer_sparsity": 0.008688926321235932, "compression/movement_sparsity/model_sparsity": 0.008390434971837576, "compression_loss": 9.551039695739746, "distillation_loss": 0.5268167853355408, "epoch": 2.09, "learning_rate": 4.393256316333239e-05, "loss": 10.2528, "step": 2475, "task_loss": 0.6399965286254883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0892646440383913, "compression/movement_sparsity/importance_threshold": -0.006378548923980964, "compression/movement_sparsity/linear_layer_sparsity": 0.00892911482992512, "compression/movement_sparsity/model_sparsity": 0.00862237226634701, "compression_loss": 9.636655807495117, "distillation_loss": 0.6597421169281006, "epoch": 2.09, "learning_rate": 4.3927867004790086e-05, "loss": 10.2403, "step": 2476, "task_loss": 0.44541287422180176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09005796747458517, "compression/movement_sparsity/importance_threshold": -0.0063729926970076956, "compression/movement_sparsity/linear_layer_sparsity": 0.009152335248068475, "compression/movement_sparsity/model_sparsity": 0.008837924376420815, "compression_loss": 9.722213745117188, "distillation_loss": 0.7705314755439758, "epoch": 2.09, "learning_rate": 4.3923170846247766e-05, "loss": 10.3992, "step": 2477, "task_loss": 0.7143381834030151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.090850830078125, "compression/movement_sparsity/importance_threshold": -0.006367439697584132, "compression/movement_sparsity/linear_layer_sparsity": 0.009376688462137238, "compression/movement_sparsity/model_sparsity": 0.0090545703673951, "compression_loss": 9.807727813720703, "distillation_loss": 0.3771783411502838, "epoch": 2.09, "learning_rate": 4.391847468770546e-05, "loss": 10.3228, "step": 2478, "task_loss": 0.9188374280929565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09164323198289603, "compression/movement_sparsity/importance_threshold": -0.006361889924772577, "compression/movement_sparsity/linear_layer_sparsity": 0.009572065948850853, "compression/movement_sparsity/model_sparsity": 0.009243236036388651, "compression_loss": 9.8931884765625, "distillation_loss": 0.5092464685440063, "epoch": 2.1, "learning_rate": 4.3913778529163145e-05, "loss": 10.4778, "step": 2479, "task_loss": 0.5467199087142944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09243517332278328, "compression/movement_sparsity/importance_threshold": -0.006356343377635335, "compression/movement_sparsity/linear_layer_sparsity": 0.00977375132024384, "compression/movement_sparsity/model_sparsity": 0.009437992894817513, "compression_loss": 9.978598594665527, "distillation_loss": 0.44155019521713257, "epoch": 2.1, "learning_rate": 4.390908237062084e-05, "loss": 10.479, "step": 2480, "task_loss": 1.1968857049942017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09322665423167198, "compression/movement_sparsity/importance_threshold": -0.006350800055234709, "compression/movement_sparsity/linear_layer_sparsity": 0.009949453930358282, "compression/movement_sparsity/model_sparsity": 0.009607659579750072, "compression_loss": 10.063962936401367, "distillation_loss": 0.5727531313896179, "epoch": 2.1, "learning_rate": 4.3904386212078525e-05, "loss": 10.6191, "step": 2481, "task_loss": 0.5607504844665527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09401767484344714, "compression/movement_sparsity/importance_threshold": -0.006345259956633005, "compression/movement_sparsity/linear_layer_sparsity": 0.010137581523149293, "compression/movement_sparsity/model_sparsity": 0.009789324410980542, "compression_loss": 10.149272918701172, "distillation_loss": 0.5028717517852783, "epoch": 2.1, "learning_rate": 4.389969005353621e-05, "loss": 10.7672, "step": 2482, "task_loss": 1.2020835876464844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09480823529199378, "compression/movement_sparsity/importance_threshold": -0.006339723080892528, "compression/movement_sparsity/linear_layer_sparsity": 0.010396288264176977, "compression/movement_sparsity/model_sparsity": 0.0100391437795789, "compression_loss": 10.234527587890625, "distillation_loss": 0.5130756497383118, "epoch": 2.1, "learning_rate": 4.38949938949939e-05, "loss": 10.661, "step": 2483, "task_loss": 0.3096490502357483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09559833571119758, "compression/movement_sparsity/importance_threshold": -0.006334189427075577, "compression/movement_sparsity/linear_layer_sparsity": 0.010668159286274653, "compression/movement_sparsity/model_sparsity": 0.010301675195694429, "compression_loss": 10.31973648071289, "distillation_loss": 0.42230138182640076, "epoch": 2.1, "learning_rate": 4.3890297736451584e-05, "loss": 10.7504, "step": 2484, "task_loss": 0.5389121770858765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09638797623494333, "compression/movement_sparsity/importance_threshold": -0.0063286589942444585, "compression/movement_sparsity/linear_layer_sparsity": 0.010863536772988269, "compression/movement_sparsity/model_sparsity": 0.01049034086468798, "compression_loss": 10.404875755310059, "distillation_loss": 0.384593665599823, "epoch": 2.1, "learning_rate": 4.388560157790928e-05, "loss": 10.9139, "step": 2485, "task_loss": 0.30024608969688416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09717715699711615, "compression/movement_sparsity/importance_threshold": -0.006323131781461477, "compression/movement_sparsity/linear_layer_sparsity": 0.011040586814045563, "compression/movement_sparsity/model_sparsity": 0.010661308692165323, "compression_loss": 10.489977836608887, "distillation_loss": 0.5715488791465759, "epoch": 2.1, "learning_rate": 4.3880905419366956e-05, "loss": 11.0848, "step": 2486, "task_loss": 1.1164438724517822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09796587813160129, "compression/movement_sparsity/importance_threshold": -0.006317607787788937, "compression/movement_sparsity/linear_layer_sparsity": 0.011213057974730685, "compression/movement_sparsity/model_sparsity": 0.01082785493789756, "compression_loss": 10.575029373168945, "distillation_loss": 0.6510967016220093, "epoch": 2.1, "learning_rate": 4.387620926082465e-05, "loss": 11.1015, "step": 2487, "task_loss": 0.9710118770599365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09875413977228376, "compression/movement_sparsity/importance_threshold": -0.006312087012289141, "compression/movement_sparsity/linear_layer_sparsity": 0.011445436153618383, "compression/movement_sparsity/model_sparsity": 0.011052250211461572, "compression_loss": 10.660024642944336, "distillation_loss": 1.3057067394256592, "epoch": 2.1, "learning_rate": 4.3871513102282336e-05, "loss": 11.4003, "step": 2488, "task_loss": 2.293001413345337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09954194205304878, "compression/movement_sparsity/importance_threshold": -0.006306569454024393, "compression/movement_sparsity/linear_layer_sparsity": 0.011594070903199416, "compression/movement_sparsity/model_sparsity": 0.011195778900140523, "compression_loss": 10.744974136352539, "distillation_loss": 0.7082593441009521, "epoch": 2.1, "learning_rate": 4.386681694374002e-05, "loss": 11.324, "step": 2489, "task_loss": 2.051161527633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1003292851077815, "compression/movement_sparsity/importance_threshold": -0.006301055112056999, "compression/movement_sparsity/linear_layer_sparsity": 0.0118046994003193, "compression/movement_sparsity/model_sparsity": 0.011399171660415292, "compression_loss": 10.829861640930176, "distillation_loss": 0.41983428597450256, "epoch": 2.1, "learning_rate": 4.386212078519771e-05, "loss": 11.2578, "step": 2490, "task_loss": 0.16304424405097961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10111616907036702, "compression/movement_sparsity/importance_threshold": -0.006295543985449261, "compression/movement_sparsity/linear_layer_sparsity": 0.012079634933499395, "compression/movement_sparsity/model_sparsity": 0.011664662312230017, "compression_loss": 10.914708137512207, "distillation_loss": 0.3453892767429352, "epoch": 2.11, "learning_rate": 4.3857424626655395e-05, "loss": 11.4853, "step": 2491, "task_loss": 0.9453598856925964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10190259407469049, "compression/movement_sparsity/importance_threshold": -0.0062900360732634835, "compression/movement_sparsity/linear_layer_sparsity": 0.012377655655222518, "compression/movement_sparsity/model_sparsity": 0.011952445105342976, "compression_loss": 10.999489784240723, "distillation_loss": 0.6023022532463074, "epoch": 2.11, "learning_rate": 4.385272846811309e-05, "loss": 11.6984, "step": 2492, "task_loss": 1.7473595142364502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10268856025463713, "compression/movement_sparsity/importance_threshold": -0.00628453137456197, "compression/movement_sparsity/linear_layer_sparsity": 0.012579507964962407, "compression/movement_sparsity/model_sparsity": 0.012147363167272962, "compression_loss": 11.084223747253418, "distillation_loss": 0.506463348865509, "epoch": 2.11, "learning_rate": 4.3848032309570774e-05, "loss": 11.6328, "step": 2493, "task_loss": 0.31890761852264404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10347406774409162, "compression/movement_sparsity/importance_threshold": -0.006279029888407028, "compression/movement_sparsity/linear_layer_sparsity": 0.012825813571648794, "compression/movement_sparsity/model_sparsity": 0.012385207418644998, "compression_loss": 11.168905258178711, "distillation_loss": 0.4475681781768799, "epoch": 2.11, "learning_rate": 4.384333615102846e-05, "loss": 11.7254, "step": 2494, "task_loss": 0.8881322741508484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10425911667693977, "compression/movement_sparsity/importance_threshold": -0.006273531613860957, "compression/movement_sparsity/linear_layer_sparsity": 0.013013547666907821, "compression/movement_sparsity/model_sparsity": 0.012566492270194247, "compression_loss": 11.253530502319336, "distillation_loss": 0.37442925572395325, "epoch": 2.11, "learning_rate": 4.383863999248615e-05, "loss": 11.8431, "step": 2495, "task_loss": 0.49670886993408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10504370718706657, "compression/movement_sparsity/importance_threshold": -0.006268036549986061, "compression/movement_sparsity/linear_layer_sparsity": 0.013253378450567933, "compression/movement_sparsity/model_sparsity": 0.012798084128629846, "compression_loss": 11.338117599487305, "distillation_loss": 0.5494994521141052, "epoch": 2.11, "learning_rate": 4.383394383394383e-05, "loss": 11.9346, "step": 2496, "task_loss": 0.31456267833709717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10582783940835694, "compression/movement_sparsity/importance_threshold": -0.006262544695844647, "compression/movement_sparsity/linear_layer_sparsity": 0.013474869864404089, "compression/movement_sparsity/model_sparsity": 0.013011966631013443, "compression_loss": 11.422650337219238, "distillation_loss": 1.0991010665893555, "epoch": 2.11, "learning_rate": 4.3829247675401526e-05, "loss": 12.0807, "step": 2497, "task_loss": 1.6813899278640747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1066115134746961, "compression/movement_sparsity/importance_threshold": -0.006257056050499016, "compression/movement_sparsity/linear_layer_sparsity": 0.013746740886501766, "compression/movement_sparsity/model_sparsity": 0.013274498047128972, "compression_loss": 11.507121086120605, "distillation_loss": 0.5585469007492065, "epoch": 2.11, "learning_rate": 4.382455151685921e-05, "loss": 11.9785, "step": 2498, "task_loss": 0.7304984927177429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1073947295199692, "compression/movement_sparsity/importance_threshold": -0.006251570613011475, "compression/movement_sparsity/linear_layer_sparsity": 0.014023190788971613, "compression/movement_sparsity/model_sparsity": 0.013541451044989605, "compression_loss": 11.591545104980469, "distillation_loss": 0.45181185007095337, "epoch": 2.11, "learning_rate": 4.38198553583169e-05, "loss": 11.9796, "step": 2499, "task_loss": 0.9213634133338928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10817748767806146, "compression/movement_sparsity/importance_threshold": -0.006246088382444325, "compression/movement_sparsity/linear_layer_sparsity": 0.014224637677011884, "compression/movement_sparsity/model_sparsity": 0.013735977612702578, "compression_loss": 11.675919532775879, "distillation_loss": 0.541807234287262, "epoch": 2.11, "learning_rate": 4.3815159199774585e-05, "loss": 12.263, "step": 2500, "task_loss": 0.557203471660614 }, { "epoch": 2.11, "eval_accuracy": 0.9057425742574258, "eval_loss": 12.00404167175293, "eval_runtime": 228.1482, "eval_samples_per_second": 110.674, "eval_steps_per_second": 0.868, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10895978808285789, "compression/movement_sparsity/importance_threshold": -0.006240609357859872, "compression/movement_sparsity/linear_layer_sparsity": 0.01456655125980262, "compression/movement_sparsity/model_sparsity": 0.01406614541207524, "compression_loss": 11.760249137878418, "distillation_loss": 0.23940984904766083, "epoch": 2.11, "learning_rate": 4.381046304123227e-05, "loss": 12.2371, "step": 2501, "task_loss": 0.22043900191783905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10974163086824362, "compression/movement_sparsity/importance_threshold": -0.00623513353832042, "compression/movement_sparsity/linear_layer_sparsity": 0.014817066097664465, "compression/movement_sparsity/model_sparsity": 0.014308054294582748, "compression_loss": 11.844521522521973, "distillation_loss": 0.5588322877883911, "epoch": 2.11, "learning_rate": 4.3805766882689965e-05, "loss": 12.4128, "step": 2502, "task_loss": 0.40949487686157227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.110523016168104, "compression/movement_sparsity/importance_threshold": -0.006229660922888272, "compression/movement_sparsity/linear_layer_sparsity": 0.015096389724534557, "compression/movement_sparsity/model_sparsity": 0.014577782295569866, "compression_loss": 11.928736686706543, "distillation_loss": 0.5555133819580078, "epoch": 2.12, "learning_rate": 4.3801070724147645e-05, "loss": 12.4085, "step": 2503, "task_loss": 0.6480487585067749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1113039441163236, "compression/movement_sparsity/importance_threshold": -0.006224191510625734, "compression/movement_sparsity/linear_layer_sparsity": 0.015337544090802251, "compression/movement_sparsity/model_sparsity": 0.01481065226747866, "compression_loss": 12.012903213500977, "distillation_loss": 0.6601122617721558, "epoch": 2.12, "learning_rate": 4.379637456560534e-05, "loss": 12.6061, "step": 2504, "task_loss": 1.199681282043457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11208441484678833, "compression/movement_sparsity/importance_threshold": -0.0062187253005951066, "compression/movement_sparsity/linear_layer_sparsity": 0.015536355737794995, "compression/movement_sparsity/model_sparsity": 0.015002634122781038, "compression_loss": 12.097017288208008, "distillation_loss": 0.5549905896186829, "epoch": 2.12, "learning_rate": 4.3791678407063024e-05, "loss": 12.6452, "step": 2505, "task_loss": 0.6281686425209045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11286442849338296, "compression/movement_sparsity/importance_threshold": -0.006213262291858696, "compression/movement_sparsity/linear_layer_sparsity": 0.01579906899914833, "compression/movement_sparsity/model_sparsity": 0.015256322375406361, "compression_loss": 12.181082725524902, "distillation_loss": 0.7515906691551208, "epoch": 2.12, "learning_rate": 4.378698224852072e-05, "loss": 12.744, "step": 2506, "task_loss": 0.7625669836997986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11364398518999275, "compression/movement_sparsity/importance_threshold": -0.006207802483478804, "compression/movement_sparsity/linear_layer_sparsity": 0.016079704284458363, "compression/movement_sparsity/model_sparsity": 0.015527316975330878, "compression_loss": 12.265105247497559, "distillation_loss": 0.48221731185913086, "epoch": 2.12, "learning_rate": 4.37822860899784e-05, "loss": 12.8517, "step": 2507, "task_loss": 0.27545469999313354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1144230850705027, "compression/movement_sparsity/importance_threshold": -0.0062023458745177375, "compression/movement_sparsity/linear_layer_sparsity": 0.016428092690275373, "compression/movement_sparsity/model_sparsity": 0.015863737167639978, "compression_loss": 12.349076271057129, "distillation_loss": 0.7552988529205322, "epoch": 2.12, "learning_rate": 4.377758993143608e-05, "loss": 12.9785, "step": 2508, "task_loss": 0.7426744699478149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11520172826879793, "compression/movement_sparsity/importance_threshold": -0.006196892464037799, "compression/movement_sparsity/linear_layer_sparsity": 0.016653984121969164, "compression/movement_sparsity/model_sparsity": 0.01608186853373176, "compression_loss": 12.432995796203613, "distillation_loss": 0.7691762447357178, "epoch": 2.12, "learning_rate": 4.3772893772893776e-05, "loss": 12.9973, "step": 2509, "task_loss": 0.7293866872787476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1159799149187637, "compression/movement_sparsity/importance_threshold": -0.006191442251101293, "compression/movement_sparsity/linear_layer_sparsity": 0.016889224101089468, "compression/movement_sparsity/model_sparsity": 0.016309027295886458, "compression_loss": 12.516862869262695, "distillation_loss": 0.3465961813926697, "epoch": 2.12, "learning_rate": 4.376819761435146e-05, "loss": 12.8938, "step": 2510, "task_loss": 0.1730797290802002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1167576451542851, "compression/movement_sparsity/importance_threshold": -0.006185995234770523, "compression/movement_sparsity/linear_layer_sparsity": 0.01728646582171061, "compression/movement_sparsity/model_sparsity": 0.016692622541345792, "compression_loss": 12.600663185119629, "distillation_loss": 0.7592371106147766, "epoch": 2.12, "learning_rate": 4.3763501455809156e-05, "loss": 13.2876, "step": 2511, "task_loss": 1.2177231311798096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1175349191092473, "compression/movement_sparsity/importance_threshold": -0.0061805514141077935, "compression/movement_sparsity/linear_layer_sparsity": 0.017675682577512813, "compression/movement_sparsity/model_sparsity": 0.0170684685042154, "compression_loss": 12.684412002563477, "distillation_loss": 0.5324851274490356, "epoch": 2.12, "learning_rate": 4.3758805297266835e-05, "loss": 13.2559, "step": 2512, "task_loss": 0.3248615860939026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11831173691753538, "compression/movement_sparsity/importance_threshold": -0.006175110788175408, "compression/movement_sparsity/linear_layer_sparsity": 0.017997933207872012, "compression/movement_sparsity/model_sparsity": 0.01737964883406286, "compression_loss": 12.76811408996582, "distillation_loss": 0.1859724223613739, "epoch": 2.12, "learning_rate": 4.375410913872453e-05, "loss": 13.1678, "step": 2513, "task_loss": 0.09421936422586441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11908809871303416, "compression/movement_sparsity/importance_threshold": -0.006169673356035673, "compression/movement_sparsity/linear_layer_sparsity": 0.018382547234966774, "compression/movement_sparsity/model_sparsity": 0.017751050186115776, "compression_loss": 12.851753234863281, "distillation_loss": 0.5571901202201843, "epoch": 2.13, "learning_rate": 4.3749412980182215e-05, "loss": 13.3425, "step": 2514, "task_loss": 1.1220035552978516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11986400462962954, "compression/movement_sparsity/importance_threshold": -0.006164239116750887, "compression/movement_sparsity/linear_layer_sparsity": 0.0186557656880073, "compression/movement_sparsity/model_sparsity": 0.018014882744776087, "compression_loss": 12.935338020324707, "distillation_loss": 0.5262259840965271, "epoch": 2.13, "learning_rate": 4.37447168216399e-05, "loss": 13.4572, "step": 2515, "task_loss": 0.5436557531356812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12063945480120619, "compression/movement_sparsity/importance_threshold": -0.0061588080693833576, "compression/movement_sparsity/linear_layer_sparsity": 0.018965603259857567, "compression/movement_sparsity/model_sparsity": 0.018314076442861437, "compression_loss": 13.018871307373047, "distillation_loss": 0.9457110166549683, "epoch": 2.13, "learning_rate": 4.374002066309759e-05, "loss": 13.5598, "step": 2516, "task_loss": 1.0214468240737915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12141444936164936, "compression/movement_sparsity/importance_threshold": -0.006153380212995388, "compression/movement_sparsity/linear_layer_sparsity": 0.019454017166222515, "compression/movement_sparsity/model_sparsity": 0.018785711829005826, "compression_loss": 13.102352142333984, "distillation_loss": 0.7864676117897034, "epoch": 2.13, "learning_rate": 4.3735324504555274e-05, "loss": 13.635, "step": 2517, "task_loss": 1.5607138872146606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12218898844484405, "compression/movement_sparsity/importance_threshold": -0.006147955546649283, "compression/movement_sparsity/linear_layer_sparsity": 0.019872436208564947, "compression/movement_sparsity/model_sparsity": 0.019189756890036262, "compression_loss": 13.185774803161621, "distillation_loss": 0.3986533582210541, "epoch": 2.13, "learning_rate": 4.373062834601297e-05, "loss": 13.6547, "step": 2518, "task_loss": 0.4902108311653137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12296307218467561, "compression/movement_sparsity/importance_threshold": -0.006142534069407346, "compression/movement_sparsity/linear_layer_sparsity": 0.02015060319117436, "compression/movement_sparsity/model_sparsity": 0.01945836798105131, "compression_loss": 13.269147872924805, "distillation_loss": 0.5591965317726135, "epoch": 2.13, "learning_rate": 4.372593218747065e-05, "loss": 13.8165, "step": 2519, "task_loss": 0.47914987802505493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12373670071502896, "compression/movement_sparsity/importance_threshold": -0.006137115780331881, "compression/movement_sparsity/linear_layer_sparsity": 0.020604103134701502, "compression/movement_sparsity/model_sparsity": 0.019896288806389287, "compression_loss": 13.352471351623535, "distillation_loss": 0.39664262533187866, "epoch": 2.13, "learning_rate": 4.372123602892834e-05, "loss": 13.9905, "step": 2520, "task_loss": 0.36817216873168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12450987416978943, "compression/movement_sparsity/importance_threshold": -0.006131700678485191, "compression/movement_sparsity/linear_layer_sparsity": 0.020872730783202224, "compression/movement_sparsity/model_sparsity": 0.020155688268768703, "compression_loss": 13.435741424560547, "distillation_loss": 0.44774866104125977, "epoch": 2.13, "learning_rate": 4.3716539870386026e-05, "loss": 14.0686, "step": 2521, "task_loss": 0.8100059032440186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12528259268284203, "compression/movement_sparsity/importance_threshold": -0.006126288762929582, "compression/movement_sparsity/linear_layer_sparsity": 0.02121614681111508, "compression/movement_sparsity/model_sparsity": 0.02048730689965148, "compression_loss": 13.518967628479004, "distillation_loss": 0.4800080358982086, "epoch": 2.13, "learning_rate": 4.371184371184371e-05, "loss": 14.028, "step": 2522, "task_loss": 0.7068597078323364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12605485638807157, "compression/movement_sparsity/importance_threshold": -0.0061208800327273594, "compression/movement_sparsity/linear_layer_sparsity": 0.021501575611814733, "compression/movement_sparsity/model_sparsity": 0.0207629303429654, "compression_loss": 13.602137565612793, "distillation_loss": 0.4840482771396637, "epoch": 2.13, "learning_rate": 4.3707147553301405e-05, "loss": 14.045, "step": 2523, "task_loss": 0.6037019491195679 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12682666541936405, "compression/movement_sparsity/importance_threshold": -0.006115474486940822, "compression/movement_sparsity/linear_layer_sparsity": 0.021842141763662615, "compression/movement_sparsity/model_sparsity": 0.02109179699979328, "compression_loss": 13.685258865356445, "distillation_loss": 0.4864698648452759, "epoch": 2.13, "learning_rate": 4.370245139475909e-05, "loss": 14.1694, "step": 2524, "task_loss": 0.7226937413215637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12759801991060393, "compression/movement_sparsity/importance_threshold": -0.006110072124632277, "compression/movement_sparsity/linear_layer_sparsity": 0.02225289356621519, "compression/movement_sparsity/model_sparsity": 0.021488438214307827, "compression_loss": 13.76832389831543, "distillation_loss": 0.89805006980896, "epoch": 2.13, "learning_rate": 4.369775523621678e-05, "loss": 14.3239, "step": 2525, "task_loss": 1.6137539148330688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12836891999567668, "compression/movement_sparsity/importance_threshold": -0.0061046729448640275, "compression/movement_sparsity/linear_layer_sparsity": 0.02256770351596961, "compression/movement_sparsity/model_sparsity": 0.021792433473819497, "compression_loss": 13.851346969604492, "distillation_loss": 0.6718326807022095, "epoch": 2.14, "learning_rate": 4.3693059077674464e-05, "loss": 14.3839, "step": 2526, "task_loss": 0.7194912433624268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1291393658084673, "compression/movement_sparsity/importance_threshold": -0.0060992769466983775, "compression/movement_sparsity/linear_layer_sparsity": 0.022908055032800047, "compression/movement_sparsity/model_sparsity": 0.022121092869003076, "compression_loss": 13.934311866760254, "distillation_loss": 0.5152491331100464, "epoch": 2.14, "learning_rate": 4.368836291913215e-05, "loss": 14.4245, "step": 2527, "task_loss": 0.9203323125839233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12990935748286092, "compression/movement_sparsity/importance_threshold": -0.006093884129197632, "compression/movement_sparsity/linear_layer_sparsity": 0.02323011487647707, "compression/movement_sparsity/model_sparsity": 0.022432088966277828, "compression_loss": 14.017224311828613, "distillation_loss": 0.655097484588623, "epoch": 2.14, "learning_rate": 4.3683666760589844e-05, "loss": 14.5106, "step": 2528, "task_loss": 0.7409726977348328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13067889515274267, "compression/movement_sparsity/importance_threshold": -0.006088494491424094, "compression/movement_sparsity/linear_layer_sparsity": 0.02372025778714922, "compression/movement_sparsity/model_sparsity": 0.022905393960112427, "compression_loss": 14.100090026855469, "distillation_loss": 0.43207496404647827, "epoch": 2.14, "learning_rate": 4.367897060204752e-05, "loss": 14.701, "step": 2529, "task_loss": 0.570621132850647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13144797895199778, "compression/movement_sparsity/importance_threshold": -0.006083108032440067, "compression/movement_sparsity/linear_layer_sparsity": 0.024085435420997524, "compression/movement_sparsity/model_sparsity": 0.02325802661882024, "compression_loss": 14.182893753051758, "distillation_loss": 0.621110737323761, "epoch": 2.14, "learning_rate": 4.3674274443505216e-05, "loss": 14.6946, "step": 2530, "task_loss": 1.1413514614105225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13221660901451127, "compression/movement_sparsity/importance_threshold": -0.006077724751307858, "compression/movement_sparsity/linear_layer_sparsity": 0.024496020285203195, "compression/movement_sparsity/model_sparsity": 0.023654506629833663, "compression_loss": 14.265647888183594, "distillation_loss": 0.7209075689315796, "epoch": 2.14, "learning_rate": 4.36695782849629e-05, "loss": 14.8499, "step": 2531, "task_loss": 1.5845314264297485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13298478547416837, "compression/movement_sparsity/importance_threshold": -0.006072344647089767, "compression/movement_sparsity/linear_layer_sparsity": 0.024894561740096646, "compression/movement_sparsity/model_sparsity": 0.0240393569596946, "compression_loss": 14.348350524902344, "distillation_loss": 0.879779577255249, "epoch": 2.14, "learning_rate": 4.366488212642059e-05, "loss": 14.8737, "step": 2532, "task_loss": 0.9149445295333862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1337525084648542, "compression/movement_sparsity/importance_threshold": -0.0060669677188481, "compression/movement_sparsity/linear_layer_sparsity": 0.02528680723447836, "compression/movement_sparsity/model_sparsity": 0.02441812761465602, "compression_loss": 14.43100357055664, "distillation_loss": 0.8005964756011963, "epoch": 2.14, "learning_rate": 4.3660185967878275e-05, "loss": 14.9526, "step": 2533, "task_loss": 2.118946075439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13451977812045357, "compression/movement_sparsity/importance_threshold": -0.006061593965645163, "compression/movement_sparsity/linear_layer_sparsity": 0.02565349923761642, "compression/movement_sparsity/model_sparsity": 0.02477222261940974, "compression_loss": 14.513608932495117, "distillation_loss": 0.4513401985168457, "epoch": 2.14, "learning_rate": 4.365548980933596e-05, "loss": 15.0254, "step": 2534, "task_loss": 1.3878161907196045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13528659457485237, "compression/movement_sparsity/importance_threshold": -0.006056223386543255, "compression/movement_sparsity/linear_layer_sparsity": 0.02606561039527948, "compression/movement_sparsity/model_sparsity": 0.02517017649100486, "compression_loss": 14.596161842346191, "distillation_loss": 0.36565977334976196, "epoch": 2.14, "learning_rate": 4.3650793650793655e-05, "loss": 15.1483, "step": 2535, "task_loss": 0.2199409157037735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13605295796193517, "compression/movement_sparsity/importance_threshold": -0.006050855980604683, "compression/movement_sparsity/linear_layer_sparsity": 0.02647772155294254, "compression/movement_sparsity/model_sparsity": 0.025568130362599986, "compression_loss": 14.67866325378418, "distillation_loss": 0.6575306057929993, "epoch": 2.14, "learning_rate": 4.364609749225134e-05, "loss": 15.1849, "step": 2536, "task_loss": 0.9486361145973206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13681886841558732, "compression/movement_sparsity/importance_threshold": -0.006045491746891751, "compression/movement_sparsity/linear_layer_sparsity": 0.026931221496469683, "compression/movement_sparsity/model_sparsity": 0.026006051187937963, "compression_loss": 14.761101722717285, "distillation_loss": 0.49703025817871094, "epoch": 2.14, "learning_rate": 4.364140133370903e-05, "loss": 15.3274, "step": 2537, "task_loss": 0.5364235043525696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13758432606969384, "compression/movement_sparsity/importance_threshold": -0.006040130684466762, "compression/movement_sparsity/linear_layer_sparsity": 0.02735706929524926, "compression/movement_sparsity/model_sparsity": 0.026417269804768398, "compression_loss": 14.843487739562988, "distillation_loss": 0.5222068428993225, "epoch": 2.15, "learning_rate": 4.3636705175166714e-05, "loss": 15.4, "step": 2538, "task_loss": 0.212115079164505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13834933105813996, "compression/movement_sparsity/importance_threshold": -0.006034772792392022, "compression/movement_sparsity/linear_layer_sparsity": 0.02788231695544139, "compression/movement_sparsity/model_sparsity": 0.026924473591982124, "compression_loss": 14.925821304321289, "distillation_loss": 0.4889252781867981, "epoch": 2.15, "learning_rate": 4.36320090166244e-05, "loss": 15.51, "step": 2539, "task_loss": 0.5091556906700134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1391138835148108, "compression/movement_sparsity/importance_threshold": -0.006029418069729832, "compression/movement_sparsity/linear_layer_sparsity": 0.028337164329911384, "compression/movement_sparsity/model_sparsity": 0.027363695559864884, "compression_loss": 15.008112907409668, "distillation_loss": 0.357362300157547, "epoch": 2.15, "learning_rate": 4.362731285808209e-05, "loss": 15.4813, "step": 2540, "task_loss": 0.11102087050676346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1398779835735915, "compression/movement_sparsity/importance_threshold": -0.006024066515542498, "compression/movement_sparsity/linear_layer_sparsity": 0.028814882257906963, "compression/movement_sparsity/model_sparsity": 0.027825002407401572, "compression_loss": 15.090338706970215, "distillation_loss": 0.8271412253379822, "epoch": 2.15, "learning_rate": 4.362261669953978e-05, "loss": 15.616, "step": 2541, "task_loss": 1.4883544445037842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14064163136836716, "compression/movement_sparsity/importance_threshold": -0.006018718128892324, "compression/movement_sparsity/linear_layer_sparsity": 0.029297572563806697, "compression/movement_sparsity/model_sparsity": 0.02829111081636458, "compression_loss": 15.172521591186523, "distillation_loss": 0.6153432130813599, "epoch": 2.15, "learning_rate": 4.3617920540997466e-05, "loss": 15.7125, "step": 2542, "task_loss": 1.3548022508621216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14140482703302293, "compression/movement_sparsity/importance_threshold": -0.006013372908841613, "compression/movement_sparsity/linear_layer_sparsity": 0.029763676352524946, "compression/movement_sparsity/model_sparsity": 0.02874120250603739, "compression_loss": 15.254646301269531, "distillation_loss": 0.6039646863937378, "epoch": 2.15, "learning_rate": 4.361322438245515e-05, "loss": 15.777, "step": 2543, "task_loss": 0.4094589948654175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14216757070144403, "compression/movement_sparsity/importance_threshold": -0.00600803085445267, "compression/movement_sparsity/linear_layer_sparsity": 0.030172913785787767, "compression/movement_sparsity/model_sparsity": 0.029136381374506026, "compression_loss": 15.336736679077148, "distillation_loss": 0.49554261565208435, "epoch": 2.15, "learning_rate": 4.3608528223912845e-05, "loss": 15.8452, "step": 2544, "task_loss": 0.5987598896026611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14292986250751516, "compression/movement_sparsity/importance_threshold": -0.006002691964787801, "compression/movement_sparsity/linear_layer_sparsity": 0.030653684300698127, "compression/movement_sparsity/model_sparsity": 0.029600635943206118, "compression_loss": 15.418764114379883, "distillation_loss": 0.8125389814376831, "epoch": 2.15, "learning_rate": 4.360383206537053e-05, "loss": 16.0127, "step": 2545, "task_loss": 0.8768194913864136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1436917025851222, "compression/movement_sparsity/importance_threshold": -0.005997356238909304, "compression/movement_sparsity/linear_layer_sparsity": 0.03125201518433046, "compression/movement_sparsity/model_sparsity": 0.030178412320304587, "compression_loss": 15.500741958618164, "distillation_loss": 0.37669795751571655, "epoch": 2.15, "learning_rate": 4.359913590682821e-05, "loss": 15.9534, "step": 2546, "task_loss": 0.6257323622703552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14445309106814996, "compression/movement_sparsity/importance_threshold": -0.005992023675879487, "compression/movement_sparsity/linear_layer_sparsity": 0.03176178527493124, "compression/movement_sparsity/model_sparsity": 0.030670670240057, "compression_loss": 15.582672119140625, "distillation_loss": 0.4324830174446106, "epoch": 2.15, "learning_rate": 4.3594439748285904e-05, "loss": 15.983, "step": 2547, "task_loss": 0.44929239153862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14521402809048345, "compression/movement_sparsity/importance_threshold": -0.005986694274760654, "compression/movement_sparsity/linear_layer_sparsity": 0.03231430350650659, "compression/movement_sparsity/model_sparsity": 0.031204207770632843, "compression_loss": 15.664546966552734, "distillation_loss": 0.27873530983924866, "epoch": 2.15, "learning_rate": 4.358974358974359e-05, "loss": 16.1197, "step": 2548, "task_loss": 0.6413927674293518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.145974513786008, "compression/movement_sparsity/importance_threshold": -0.0059813680346151085, "compression/movement_sparsity/linear_layer_sparsity": 0.032713250383099655, "compression/movement_sparsity/model_sparsity": 0.03158944959471079, "compression_loss": 15.746368408203125, "distillation_loss": 0.44594377279281616, "epoch": 2.15, "learning_rate": 4.3585047431201284e-05, "loss": 16.222, "step": 2549, "task_loss": 0.8348119258880615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14673454828860866, "compression/movement_sparsity/importance_threshold": -0.0059760449545051535, "compression/movement_sparsity/linear_layer_sparsity": 0.03328564620212399, "compression/movement_sparsity/model_sparsity": 0.032142181856456134, "compression_loss": 15.828140258789062, "distillation_loss": 0.667431652545929, "epoch": 2.16, "learning_rate": 4.3580351272658963e-05, "loss": 16.373, "step": 2550, "task_loss": 1.9103401899337769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14749413173217052, "compression/movement_sparsity/importance_threshold": -0.005970725033493094, "compression/movement_sparsity/linear_layer_sparsity": 0.03381852532960307, "compression/movement_sparsity/model_sparsity": 0.03265675494657837, "compression_loss": 15.909868240356445, "distillation_loss": 0.4898093342781067, "epoch": 2.16, "learning_rate": 4.3575655114116657e-05, "loss": 16.3311, "step": 2551, "task_loss": 0.4720446765422821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14825326425057883, "compression/movement_sparsity/importance_threshold": -0.0059654082706412336, "compression/movement_sparsity/linear_layer_sparsity": 0.03424742571529743, "compression/movement_sparsity/model_sparsity": 0.03307092128457221, "compression_loss": 15.991547584533691, "distillation_loss": 0.31136834621429443, "epoch": 2.16, "learning_rate": 4.357095895557434e-05, "loss": 16.537, "step": 2552, "task_loss": 0.21877436339855194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1490119459777186, "compression/movement_sparsity/importance_threshold": -0.005960094665011878, "compression/movement_sparsity/linear_layer_sparsity": 0.034558789580605084, "compression/movement_sparsity/model_sparsity": 0.03337158884323926, "compression_loss": 16.073169708251953, "distillation_loss": 0.5418140888214111, "epoch": 2.16, "learning_rate": 4.356626279703203e-05, "loss": 16.6177, "step": 2553, "task_loss": 0.9892523288726807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14977017704747486, "compression/movement_sparsity/importance_threshold": -0.005954784215667331, "compression/movement_sparsity/linear_layer_sparsity": 0.03509928825120348, "compression/movement_sparsity/model_sparsity": 0.033893519721734204, "compression_loss": 16.154754638671875, "distillation_loss": 0.42275217175483704, "epoch": 2.16, "learning_rate": 4.3561566638489716e-05, "loss": 16.628, "step": 2554, "task_loss": 0.8480641841888428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15052795759373316, "compression/movement_sparsity/importance_threshold": -0.005949476921669892, "compression/movement_sparsity/linear_layer_sparsity": 0.03564878966836696, "compression/movement_sparsity/model_sparsity": 0.034424144074754026, "compression_loss": 16.236284255981445, "distillation_loss": 0.2954394221305847, "epoch": 2.16, "learning_rate": 4.35568704799474e-05, "loss": 16.7439, "step": 2555, "task_loss": 0.40342456102371216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15128528775037842, "compression/movement_sparsity/importance_threshold": -0.005944172782081869, "compression/movement_sparsity/linear_layer_sparsity": 0.0360580032532945, "compression/movement_sparsity/model_sparsity": 0.03481929991415107, "compression_loss": 16.317771911621094, "distillation_loss": 0.6737840175628662, "epoch": 2.16, "learning_rate": 4.3552174321405095e-05, "loss": 16.9437, "step": 2556, "task_loss": 1.033182978630066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15204216765129575, "compression/movement_sparsity/importance_threshold": -0.005938871795965566, "compression/movement_sparsity/linear_layer_sparsity": 0.036564911543662676, "compression/movement_sparsity/model_sparsity": 0.0353087943453128, "compression_loss": 16.399198532104492, "distillation_loss": 0.5425746440887451, "epoch": 2.16, "learning_rate": 4.354747816286278e-05, "loss": 17.0122, "step": 2557, "task_loss": 2.0958335399627686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1527985974303704, "compression/movement_sparsity/importance_threshold": -0.005933573962383285, "compression/movement_sparsity/linear_layer_sparsity": 0.03700468677024094, "compression/movement_sparsity/model_sparsity": 0.03573346193995126, "compression_loss": 16.48058319091797, "distillation_loss": 0.5978001952171326, "epoch": 2.16, "learning_rate": 4.354278200432047e-05, "loss": 16.9338, "step": 2558, "task_loss": 0.5980600118637085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15355457722148735, "compression/movement_sparsity/importance_threshold": -0.005928279280397332, "compression/movement_sparsity/linear_layer_sparsity": 0.037483942915861546, "compression/movement_sparsity/model_sparsity": 0.03619625416260544, "compression_loss": 16.561906814575195, "distillation_loss": 0.6397451162338257, "epoch": 2.16, "learning_rate": 4.3538085845778154e-05, "loss": 17.0903, "step": 2559, "task_loss": 1.148945689201355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15431010715853188, "compression/movement_sparsity/importance_threshold": -0.00592298774907001, "compression/movement_sparsity/linear_layer_sparsity": 0.037963199061482156, "compression/movement_sparsity/model_sparsity": 0.036659046385259624, "compression_loss": 16.643184661865234, "distillation_loss": 0.5077143311500549, "epoch": 2.16, "learning_rate": 4.353338968723584e-05, "loss": 17.2154, "step": 2560, "task_loss": 0.617636501789093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1550651873753891, "compression/movement_sparsity/importance_threshold": -0.005917699367463623, "compression/movement_sparsity/linear_layer_sparsity": 0.03848062446770516, "compression/movement_sparsity/model_sparsity": 0.03715869663699214, "compression_loss": 16.724411010742188, "distillation_loss": 0.456114262342453, "epoch": 2.16, "learning_rate": 4.3528693528693534e-05, "loss": 17.2304, "step": 2561, "task_loss": 0.4709998369216919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15581981800594402, "compression/movement_sparsity/importance_threshold": -0.005912414134640475, "compression/movement_sparsity/linear_layer_sparsity": 0.03902245864507877, "compression/movement_sparsity/model_sparsity": 0.03768191714349607, "compression_loss": 16.805585861206055, "distillation_loss": 0.8539761304855347, "epoch": 2.17, "learning_rate": 4.352399737015122e-05, "loss": 17.4272, "step": 2562, "task_loss": 1.3193838596343994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.156573999184082, "compression/movement_sparsity/importance_threshold": -0.00590713204966287, "compression/movement_sparsity/linear_layer_sparsity": 0.039566021826759584, "compression/movement_sparsity/model_sparsity": 0.03820680725769021, "compression_loss": 16.886709213256836, "distillation_loss": 0.38395893573760986, "epoch": 2.17, "learning_rate": 4.3519301211608906e-05, "loss": 17.2919, "step": 2563, "task_loss": 0.22457440197467804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1573277310436879, "compression/movement_sparsity/importance_threshold": -0.0059018531115931135, "compression/movement_sparsity/linear_layer_sparsity": 0.0401612881993095, "compression/movement_sparsity/model_sparsity": 0.038781624399089484, "compression_loss": 16.967763900756836, "distillation_loss": 0.4609811305999756, "epoch": 2.17, "learning_rate": 4.351460505306659e-05, "loss": 17.406, "step": 2564, "task_loss": 0.43393149971961975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1580810137186469, "compression/movement_sparsity/importance_threshold": -0.005896577319493508, "compression/movement_sparsity/linear_layer_sparsity": 0.04067735425042202, "compression/movement_sparsity/model_sparsity": 0.03927996199374141, "compression_loss": 17.04880142211914, "distillation_loss": 0.30530282855033875, "epoch": 2.17, "learning_rate": 4.350990889452428e-05, "loss": 17.5863, "step": 2565, "task_loss": 0.04473242163658142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15883384734284445, "compression/movement_sparsity/importance_threshold": -0.005891304672426357, "compression/movement_sparsity/linear_layer_sparsity": 0.04123901831857407, "compression/movement_sparsity/model_sparsity": 0.03982233117327167, "compression_loss": 17.129777908325195, "distillation_loss": 0.28446143865585327, "epoch": 2.17, "learning_rate": 4.350521273598197e-05, "loss": 17.538, "step": 2566, "task_loss": 0.4693899154663086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15958623205016553, "compression/movement_sparsity/importance_threshold": -0.005886035169453964, "compression/movement_sparsity/linear_layer_sparsity": 0.04175643180062944, "compression/movement_sparsity/model_sparsity": 0.04032196991046838, "compression_loss": 17.2106990814209, "distillation_loss": 0.34160158038139343, "epoch": 2.17, "learning_rate": 4.350051657743965e-05, "loss": 17.6081, "step": 2567, "task_loss": 0.2820066511631012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1603381679744953, "compression/movement_sparsity/importance_threshold": -0.0058807688096386345, "compression/movement_sparsity/linear_layer_sparsity": 0.042417316867679515, "compression/movement_sparsity/model_sparsity": 0.04096015154234501, "compression_loss": 17.291553497314453, "distillation_loss": 0.4809970259666443, "epoch": 2.17, "learning_rate": 4.3495820418897345e-05, "loss": 17.7206, "step": 2568, "task_loss": 0.4056711792945862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16108965524971874, "compression/movement_sparsity/importance_threshold": -0.005875505592042673, "compression/movement_sparsity/linear_layer_sparsity": 0.04317760179614214, "compression/movement_sparsity/model_sparsity": 0.04169431834460493, "compression_loss": 17.37234115600586, "distillation_loss": 0.6096224784851074, "epoch": 2.17, "learning_rate": 4.349112426035503e-05, "loss": 17.9191, "step": 2569, "task_loss": 1.2472753524780273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16184069400972123, "compression/movement_sparsity/importance_threshold": -0.005870245515728381, "compression/movement_sparsity/linear_layer_sparsity": 0.0437118045062288, "compression/movement_sparsity/model_sparsity": 0.04221016954820036, "compression_loss": 17.453109741210938, "distillation_loss": 0.271640419960022, "epoch": 2.17, "learning_rate": 4.348642810181272e-05, "loss": 17.9301, "step": 2570, "task_loss": 1.587713599205017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16259128438838777, "compression/movement_sparsity/importance_threshold": -0.005864988579758064, "compression/movement_sparsity/linear_layer_sparsity": 0.04435132146487541, "compression/movement_sparsity/model_sparsity": 0.042827717131933174, "compression_loss": 17.5338134765625, "distillation_loss": 0.4093632698059082, "epoch": 2.17, "learning_rate": 4.348173194327041e-05, "loss": 18.0738, "step": 2571, "task_loss": 0.5096733570098877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16334142651960348, "compression/movement_sparsity/importance_threshold": -0.005859734783194026, "compression/movement_sparsity/linear_layer_sparsity": 0.04491910263102466, "compression/movement_sparsity/model_sparsity": 0.04337599326832602, "compression_loss": 17.61447525024414, "distillation_loss": 0.6001753807067871, "epoch": 2.17, "learning_rate": 4.347703578472809e-05, "loss": 18.082, "step": 2572, "task_loss": 0.907132089138031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1640911205372535, "compression/movement_sparsity/importance_threshold": -0.005854484125098572, "compression/movement_sparsity/linear_layer_sparsity": 0.045450455465046356, "compression/movement_sparsity/model_sparsity": 0.04388909249786656, "compression_loss": 17.695087432861328, "distillation_loss": 0.4726676940917969, "epoch": 2.17, "learning_rate": 4.347233962618578e-05, "loss": 18.1939, "step": 2573, "task_loss": 1.0323283672332764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16484036657522272, "compression/movement_sparsity/importance_threshold": -0.005849236604534006, "compression/movement_sparsity/linear_layer_sparsity": 0.04603825730865622, "compression/movement_sparsity/model_sparsity": 0.044456701539858444, "compression_loss": 17.775634765625, "distillation_loss": 0.36013925075531006, "epoch": 2.18, "learning_rate": 4.346764346764347e-05, "loss": 18.3455, "step": 2574, "task_loss": 0.7354448437690735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16558916476739693, "compression/movement_sparsity/importance_threshold": -0.005843992220562629, "compression/movement_sparsity/linear_layer_sparsity": 0.04644158035477165, "compression/movement_sparsity/model_sparsity": 0.044846169198573, "compression_loss": 17.856159210205078, "distillation_loss": 0.6834437251091003, "epoch": 2.18, "learning_rate": 4.346294730910116e-05, "loss": 18.4358, "step": 2575, "task_loss": 0.36337849497795105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16633751524766094, "compression/movement_sparsity/importance_threshold": -0.005838750972246746, "compression/movement_sparsity/linear_layer_sparsity": 0.04698495274977029, "compression/movement_sparsity/model_sparsity": 0.045370875080194425, "compression_loss": 17.936613082885742, "distillation_loss": 0.6196240186691284, "epoch": 2.18, "learning_rate": 4.345825115055884e-05, "loss": 18.5703, "step": 2576, "task_loss": 0.7004544138908386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16708541814989974, "compression/movement_sparsity/importance_threshold": -0.005833512858648663, "compression/movement_sparsity/linear_layer_sparsity": 0.04758634814448504, "compression/movement_sparsity/model_sparsity": 0.04595161069299209, "compression_loss": 18.01702880859375, "distillation_loss": 0.37560123205184937, "epoch": 2.18, "learning_rate": 4.345355499201653e-05, "loss": 18.5944, "step": 2577, "task_loss": 0.34965047240257263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1678328736079987, "compression/movement_sparsity/importance_threshold": -0.005828277878830682, "compression/movement_sparsity/linear_layer_sparsity": 0.04800149996489525, "compression/movement_sparsity/model_sparsity": 0.04635250077121482, "compression_loss": 18.097414016723633, "distillation_loss": 0.6937552094459534, "epoch": 2.18, "learning_rate": 4.344885883347422e-05, "loss": 18.7356, "step": 2578, "task_loss": 2.303344488143921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16857988175584282, "compression/movement_sparsity/importance_threshold": -0.005823046031855108, "compression/movement_sparsity/linear_layer_sparsity": 0.04865323919536862, "compression/movement_sparsity/model_sparsity": 0.046981850754137036, "compression_loss": 18.1777286529541, "distillation_loss": 0.4613959789276123, "epoch": 2.18, "learning_rate": 4.344416267493191e-05, "loss": 18.842, "step": 2579, "task_loss": 0.3624493479728699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16932644272731723, "compression/movement_sparsity/importance_threshold": -0.005817817316784245, "compression/movement_sparsity/linear_layer_sparsity": 0.04910521284543837, "compression/movement_sparsity/model_sparsity": 0.04741829771889331, "compression_loss": 18.257991790771484, "distillation_loss": 0.6476542949676514, "epoch": 2.18, "learning_rate": 4.3439466516389594e-05, "loss": 18.9013, "step": 2580, "task_loss": 0.4365416169166565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17007255665630716, "compression/movement_sparsity/importance_threshold": -0.005812591732680396, "compression/movement_sparsity/linear_layer_sparsity": 0.04966382432667564, "compression/movement_sparsity/model_sparsity": 0.04795771917726017, "compression_loss": 18.338224411010742, "distillation_loss": 0.45096755027770996, "epoch": 2.18, "learning_rate": 4.343477035784728e-05, "loss": 18.7769, "step": 2581, "task_loss": 0.5146486759185791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17081822367669763, "compression/movement_sparsity/importance_threshold": -0.005807369278605866, "compression/movement_sparsity/linear_layer_sparsity": 0.05026387229044754, "compression/movement_sparsity/model_sparsity": 0.04853715364751305, "compression_loss": 18.418376922607422, "distillation_loss": 0.44547998905181885, "epoch": 2.18, "learning_rate": 4.3430074199304974e-05, "loss": 18.9035, "step": 2582, "task_loss": 0.38143670558929443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17156344392237388, "compression/movement_sparsity/importance_threshold": -0.00580214995362296, "compression/movement_sparsity/linear_layer_sparsity": 0.050747719240607954, "compression/movement_sparsity/model_sparsity": 0.04900437896644813, "compression_loss": 18.498502731323242, "distillation_loss": 0.5979600548744202, "epoch": 2.18, "learning_rate": 4.342537804076266e-05, "loss": 19.0264, "step": 2583, "task_loss": 0.7864080667495728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17230821752722114, "compression/movement_sparsity/importance_threshold": -0.005796933756793979, "compression/movement_sparsity/linear_layer_sparsity": 0.05135081979129463, "compression/movement_sparsity/model_sparsity": 0.04958676115786441, "compression_loss": 18.578575134277344, "distillation_loss": 1.04672372341156, "epoch": 2.18, "learning_rate": 4.3420681882220346e-05, "loss": 19.2967, "step": 2584, "task_loss": 0.7037136554718018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17305254462512398, "compression/movement_sparsity/importance_threshold": -0.005791720687181231, "compression/movement_sparsity/linear_layer_sparsity": 0.05212007169381943, "compression/movement_sparsity/model_sparsity": 0.050329586891041826, "compression_loss": 18.65860366821289, "distillation_loss": 0.44890037178993225, "epoch": 2.19, "learning_rate": 4.341598572367803e-05, "loss": 19.2456, "step": 2585, "task_loss": 0.363080769777298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17379642534996842, "compression/movement_sparsity/importance_threshold": -0.005786510743847016, "compression/movement_sparsity/linear_layer_sparsity": 0.05253063270968983, "compression/movement_sparsity/model_sparsity": 0.05072604387298366, "compression_loss": 18.738582611083984, "distillation_loss": 0.5087771415710449, "epoch": 2.19, "learning_rate": 4.341128956513572e-05, "loss": 19.3771, "step": 2586, "task_loss": 0.8138629198074341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17453985983563913, "compression/movement_sparsity/importance_threshold": -0.005781303925853639, "compression/movement_sparsity/linear_layer_sparsity": 0.05314422652789606, "compression/movement_sparsity/model_sparsity": 0.05131855885589914, "compression_loss": 18.818496704101562, "distillation_loss": 0.5859801173210144, "epoch": 2.19, "learning_rate": 4.340659340659341e-05, "loss": 19.3493, "step": 2587, "task_loss": 0.3020893335342407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17528284821602125, "compression/movement_sparsity/importance_threshold": -0.005776100232263406, "compression/movement_sparsity/linear_layer_sparsity": 0.05377917653033813, "compression/movement_sparsity/model_sparsity": 0.05193169637242265, "compression_loss": 18.898366928100586, "distillation_loss": 0.43857818841934204, "epoch": 2.19, "learning_rate": 4.34018972480511e-05, "loss": 19.4406, "step": 2588, "task_loss": 1.233670711517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.176025390625, "compression/movement_sparsity/importance_threshold": -0.005770899662138618, "compression/movement_sparsity/linear_layer_sparsity": 0.05433188554859566, "compression/movement_sparsity/model_sparsity": 0.0524654181355712, "compression_loss": 18.97818946838379, "distillation_loss": 0.6065633296966553, "epoch": 2.19, "learning_rate": 4.3397201089508785e-05, "loss": 19.3978, "step": 2589, "task_loss": 0.9234592318534851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17676748719646052, "compression/movement_sparsity/importance_threshold": -0.005765702214541581, "compression/movement_sparsity/linear_layer_sparsity": 0.0549243020450806, "compression/movement_sparsity/model_sparsity": 0.05303748330291558, "compression_loss": 19.05794906616211, "distillation_loss": 0.9864470958709717, "epoch": 2.19, "learning_rate": 4.339250493096647e-05, "loss": 19.7077, "step": 2590, "task_loss": 1.4414234161376953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17750913806428792, "compression/movement_sparsity/importance_threshold": -0.005760507888534598, "compression/movement_sparsity/linear_layer_sparsity": 0.055408339781923194, "compression/movement_sparsity/model_sparsity": 0.05350489285442337, "compression_loss": 19.137659072875977, "distillation_loss": 0.574617862701416, "epoch": 2.19, "learning_rate": 4.338780877242416e-05, "loss": 19.6646, "step": 2591, "task_loss": 0.9419476389884949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17825034336236723, "compression/movement_sparsity/importance_threshold": -0.005755316683179974, "compression/movement_sparsity/linear_layer_sparsity": 0.05591201662286204, "compression/movement_sparsity/model_sparsity": 0.05399126684638477, "compression_loss": 19.217308044433594, "distillation_loss": 0.49007704854011536, "epoch": 2.19, "learning_rate": 4.338311261388185e-05, "loss": 19.6607, "step": 2592, "task_loss": 0.2581959664821625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17899110322458378, "compression/movement_sparsity/importance_threshold": -0.0057501285975400124, "compression/movement_sparsity/linear_layer_sparsity": 0.05661430239994383, "compression/movement_sparsity/model_sparsity": 0.05466942694654005, "compression_loss": 19.296913146972656, "distillation_loss": 0.5571882724761963, "epoch": 2.19, "learning_rate": 4.337841645533953e-05, "loss": 19.7352, "step": 2593, "task_loss": 0.2809939682483673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17973141778482238, "compression/movement_sparsity/importance_threshold": -0.00574494363067702, "compression/movement_sparsity/linear_layer_sparsity": 0.057288936032278054, "compression/movement_sparsity/model_sparsity": 0.055320884838187784, "compression_loss": 19.376455307006836, "distillation_loss": 0.4118906855583191, "epoch": 2.19, "learning_rate": 4.3373720296797223e-05, "loss": 19.8222, "step": 2594, "task_loss": 0.42496564984321594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1804712871769687, "compression/movement_sparsity/importance_threshold": -0.0057397617816532945, "compression/movement_sparsity/linear_layer_sparsity": 0.05779912346874609, "compression/movement_sparsity/model_sparsity": 0.05581354576669301, "compression_loss": 19.455936431884766, "distillation_loss": 0.7936160564422607, "epoch": 2.19, "learning_rate": 4.336902413825491e-05, "loss": 20.0499, "step": 2595, "task_loss": 1.0827429294586182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18121071153490753, "compression/movement_sparsity/importance_threshold": -0.0057345830495311445, "compression/movement_sparsity/linear_layer_sparsity": 0.05847373325274504, "compression/movement_sparsity/model_sparsity": 0.05646498062926915, "compression_loss": 19.535396575927734, "distillation_loss": 0.4885663688182831, "epoch": 2.19, "learning_rate": 4.3364327979712596e-05, "loss": 20.1246, "step": 2596, "task_loss": 1.4198226928710938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18194969099252412, "compression/movement_sparsity/importance_threshold": -0.005729407433372873, "compression/movement_sparsity/linear_layer_sparsity": 0.059038700315332225, "compression/movement_sparsity/model_sparsity": 0.057010539335214494, "compression_loss": 19.61480140686035, "distillation_loss": 0.604716956615448, "epoch": 2.2, "learning_rate": 4.335963182117028e-05, "loss": 20.1506, "step": 2597, "task_loss": 0.42257821559906006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18268822568370358, "compression/movement_sparsity/importance_threshold": -0.005724234932240784, "compression/movement_sparsity/linear_layer_sparsity": 0.059786596033621194, "compression/movement_sparsity/model_sparsity": 0.05773274253478389, "compression_loss": 19.69416046142578, "distillation_loss": 0.38764089345932007, "epoch": 2.2, "learning_rate": 4.335493566262797e-05, "loss": 20.203, "step": 2598, "task_loss": 0.7139275074005127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18342631574233104, "compression/movement_sparsity/importance_threshold": -0.0057190655451971805, "compression/movement_sparsity/linear_layer_sparsity": 0.06040780939494675, "compression/movement_sparsity/model_sparsity": 0.05833261530607208, "compression_loss": 19.773475646972656, "distillation_loss": 0.687164306640625, "epoch": 2.2, "learning_rate": 4.335023950408566e-05, "loss": 20.3461, "step": 2599, "task_loss": 1.1853910684585571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18416396130229173, "compression/movement_sparsity/importance_threshold": -0.005713899271304367, "compression/movement_sparsity/linear_layer_sparsity": 0.060990853495669906, "compression/movement_sparsity/model_sparsity": 0.058895630048281945, "compression_loss": 19.85274314880371, "distillation_loss": 0.46769702434539795, "epoch": 2.2, "learning_rate": 4.334554334554335e-05, "loss": 20.2885, "step": 2600, "task_loss": 0.5585185289382935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18490116249747057, "compression/movement_sparsity/importance_threshold": -0.005708736109624649, "compression/movement_sparsity/linear_layer_sparsity": 0.06148233191311727, "compression/movement_sparsity/model_sparsity": 0.05937022467012553, "compression_loss": 19.931962966918945, "distillation_loss": 0.3417395055294037, "epoch": 2.2, "learning_rate": 4.334084718700104e-05, "loss": 20.5222, "step": 2601, "task_loss": 1.5005073547363281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1856379194617529, "compression/movement_sparsity/importance_threshold": -0.005703576059220328, "compression/movement_sparsity/linear_layer_sparsity": 0.06209763088729544, "compression/movement_sparsity/model_sparsity": 0.05996438623165963, "compression_loss": 20.011117935180664, "distillation_loss": 1.0206444263458252, "epoch": 2.2, "learning_rate": 4.333615102845872e-05, "loss": 20.757, "step": 2602, "task_loss": 0.6419742107391357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18637423232902384, "compression/movement_sparsity/importance_threshold": -0.00569841911915371, "compression/movement_sparsity/linear_layer_sparsity": 0.06260607739528863, "compression/movement_sparsity/model_sparsity": 0.06045536603793885, "compression_loss": 20.090251922607422, "distillation_loss": 0.5924590229988098, "epoch": 2.2, "learning_rate": 4.333145486991641e-05, "loss": 20.6306, "step": 2603, "task_loss": 0.24952159821987152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1871101012331684, "compression/movement_sparsity/importance_threshold": -0.005693265288487098, "compression/movement_sparsity/linear_layer_sparsity": 0.06316471272486118, "compression/movement_sparsity/model_sparsity": 0.060994810525377294, "compression_loss": 20.1693115234375, "distillation_loss": 0.450995534658432, "epoch": 2.2, "learning_rate": 4.33267587113741e-05, "loss": 20.6548, "step": 2604, "task_loss": 0.3976738154888153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1878455263080716, "compression/movement_sparsity/importance_threshold": -0.005688114566282799, "compression/movement_sparsity/linear_layer_sparsity": 0.06379965080313561, "compression/movement_sparsity/model_sparsity": 0.061607936527365, "compression_loss": 20.248327255249023, "distillation_loss": 0.6952104568481445, "epoch": 2.2, "learning_rate": 4.332206255283179e-05, "loss": 20.7686, "step": 2605, "task_loss": 0.7780268788337708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18858050768761903, "compression/movement_sparsity/importance_threshold": -0.005682966951603112, "compression/movement_sparsity/linear_layer_sparsity": 0.06444392550466893, "compression/movement_sparsity/model_sparsity": 0.06223007841087984, "compression_loss": 20.327301025390625, "distillation_loss": 0.5592168569564819, "epoch": 2.2, "learning_rate": 4.331736639428947e-05, "loss": 20.7604, "step": 2606, "task_loss": 0.8684561848640442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18931504550569556, "compression/movement_sparsity/importance_threshold": -0.005677822443510342, "compression/movement_sparsity/linear_layer_sparsity": 0.06513705352100638, "compression/movement_sparsity/model_sparsity": 0.0628993953475449, "compression_loss": 20.40620231628418, "distillation_loss": 0.3979378342628479, "epoch": 2.2, "learning_rate": 4.331267023574716e-05, "loss": 20.8494, "step": 2607, "task_loss": 0.146898552775383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19004913989618644, "compression/movement_sparsity/importance_threshold": -0.005672681041066795, "compression/movement_sparsity/linear_layer_sparsity": 0.06587883214129814, "compression/movement_sparsity/model_sparsity": 0.0636156915902517, "compression_loss": 20.48504638671875, "distillation_loss": 0.8707166910171509, "epoch": 2.2, "learning_rate": 4.330797407720485e-05, "loss": 21.0762, "step": 2608, "task_loss": 0.693575918674469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19078279099297668, "compression/movement_sparsity/importance_threshold": -0.005667542743334773, "compression/movement_sparsity/linear_layer_sparsity": 0.06658400356694781, "compression/movement_sparsity/model_sparsity": 0.06429663820806925, "compression_loss": 20.563831329345703, "distillation_loss": 0.519855260848999, "epoch": 2.21, "learning_rate": 4.330327791866254e-05, "loss": 21.0682, "step": 2609, "task_loss": 0.7062942385673523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19151599892995141, "compression/movement_sparsity/importance_threshold": -0.005662407549376584, "compression/movement_sparsity/linear_layer_sparsity": 0.06720691016007765, "compression/movement_sparsity/model_sparsity": 0.06489814604344027, "compression_loss": 20.6425838470459, "distillation_loss": 0.28868207335472107, "epoch": 2.21, "learning_rate": 4.329858176012022e-05, "loss": 20.985, "step": 2610, "task_loss": 0.3222710192203522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19224876384099587, "compression/movement_sparsity/importance_threshold": -0.005657275458254526, "compression/movement_sparsity/linear_layer_sparsity": 0.06787564132943212, "compression/movement_sparsity/model_sparsity": 0.06554390423986971, "compression_loss": 20.721256256103516, "distillation_loss": 0.4006032943725586, "epoch": 2.21, "learning_rate": 4.329388560157791e-05, "loss": 21.3493, "step": 2611, "task_loss": 1.075363039970398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19298108585999518, "compression/movement_sparsity/importance_threshold": -0.005652146469030908, "compression/movement_sparsity/linear_layer_sparsity": 0.06846785511506727, "compression/movement_sparsity/model_sparsity": 0.06611577366010557, "compression_loss": 20.799875259399414, "distillation_loss": 0.7120269536972046, "epoch": 2.21, "learning_rate": 4.32891894430356e-05, "loss": 21.5071, "step": 2612, "task_loss": 0.5701888799667358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19371296512083447, "compression/movement_sparsity/importance_threshold": -0.005647020580768032, "compression/movement_sparsity/linear_layer_sparsity": 0.06924018345284211, "compression/movement_sparsity/model_sparsity": 0.06686157014351798, "compression_loss": 20.878467559814453, "distillation_loss": 0.6383981704711914, "epoch": 2.21, "learning_rate": 4.328449328449329e-05, "loss": 21.585, "step": 2613, "task_loss": 0.15748874843120575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19444440175739863, "compression/movement_sparsity/importance_threshold": -0.005641897792528203, "compression/movement_sparsity/linear_layer_sparsity": 0.06987075728576182, "compression/movement_sparsity/model_sparsity": 0.06747048182540488, "compression_loss": 20.957014083862305, "distillation_loss": 0.5648521780967712, "epoch": 2.21, "learning_rate": 4.327979712595097e-05, "loss": 21.6449, "step": 2614, "task_loss": 0.3372357487678528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19517539590357325, "compression/movement_sparsity/importance_threshold": -0.005636778103373722, "compression/movement_sparsity/linear_layer_sparsity": 0.07051791763586301, "compression/movement_sparsity/model_sparsity": 0.068095410226582, "compression_loss": 21.03551483154297, "distillation_loss": 0.44243037700653076, "epoch": 2.21, "learning_rate": 4.3275100967408664e-05, "loss": 21.6, "step": 2615, "task_loss": 0.5873730778694153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19590594769324332, "compression/movement_sparsity/importance_threshold": -0.005631661512366895, "compression/movement_sparsity/linear_layer_sparsity": 0.07102159447680187, "compression/movement_sparsity/model_sparsity": 0.0685817842185434, "compression_loss": 21.113969802856445, "distillation_loss": 0.4282251298427582, "epoch": 2.21, "learning_rate": 4.327040480886635e-05, "loss": 21.5722, "step": 2616, "task_loss": 0.8199256062507629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19663605726029387, "compression/movement_sparsity/importance_threshold": -0.005626548018570027, "compression/movement_sparsity/linear_layer_sparsity": 0.07159568352763049, "compression/movement_sparsity/model_sparsity": 0.06913615154437157, "compression_loss": 21.1923770904541, "distillation_loss": 0.6075276136398315, "epoch": 2.21, "learning_rate": 4.3265708650324036e-05, "loss": 21.7401, "step": 2617, "task_loss": 1.2810386419296265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19736572473861014, "compression/movement_sparsity/importance_threshold": -0.00562143762104542, "compression/movement_sparsity/linear_layer_sparsity": 0.07239851388621787, "compression/movement_sparsity/model_sparsity": 0.06991140221034642, "compression_loss": 21.27072525024414, "distillation_loss": 0.6929813623428345, "epoch": 2.21, "learning_rate": 4.326101249178173e-05, "loss": 21.9489, "step": 2618, "task_loss": 0.7044781446456909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19809495026207724, "compression/movement_sparsity/importance_threshold": -0.005616330318855379, "compression/movement_sparsity/linear_layer_sparsity": 0.07310977856152946, "compression/movement_sparsity/model_sparsity": 0.07059823275595498, "compression_loss": 21.349048614501953, "distillation_loss": 0.7015190124511719, "epoch": 2.21, "learning_rate": 4.325631633323941e-05, "loss": 21.966, "step": 2619, "task_loss": 0.6157370209693909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1988237339645803, "compression/movement_sparsity/importance_threshold": -0.005611226111062208, "compression/movement_sparsity/linear_layer_sparsity": 0.07374778115088632, "compression/movement_sparsity/model_sparsity": 0.07121431799364188, "compression_loss": 21.427309036254883, "distillation_loss": 0.7665265798568726, "epoch": 2.21, "learning_rate": 4.32516201746971e-05, "loss": 22.049, "step": 2620, "task_loss": 0.8627924919128418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19955207598000446, "compression/movement_sparsity/importance_threshold": -0.00560612499672821, "compression/movement_sparsity/linear_layer_sparsity": 0.07445904582619792, "compression/movement_sparsity/model_sparsity": 0.07190114853925045, "compression_loss": 21.505512237548828, "distillation_loss": 0.7955713272094727, "epoch": 2.22, "learning_rate": 4.324692401615479e-05, "loss": 22.1575, "step": 2621, "task_loss": 0.3291466236114502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20027997644223483, "compression/movement_sparsity/importance_threshold": -0.005601026974915691, "compression/movement_sparsity/linear_layer_sparsity": 0.07520103908150712, "compression/movement_sparsity/model_sparsity": 0.07261765204360154, "compression_loss": 21.58367347717285, "distillation_loss": 0.3682970404624939, "epoch": 2.22, "learning_rate": 4.3242227857612475e-05, "loss": 22.0635, "step": 2622, "task_loss": 0.9004453420639038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20100743548515654, "compression/movement_sparsity/importance_threshold": -0.005595932044686953, "compression/movement_sparsity/linear_layer_sparsity": 0.075643676108318, "compression/movement_sparsity/model_sparsity": 0.0730450831268307, "compression_loss": 21.66181182861328, "distillation_loss": 0.625929057598114, "epoch": 2.22, "learning_rate": 4.323753169907016e-05, "loss": 22.4386, "step": 2623, "task_loss": 1.2974079847335815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20173445324265482, "compression/movement_sparsity/importance_threshold": -0.005590840205104302, "compression/movement_sparsity/linear_layer_sparsity": 0.07625420541544181, "compression/movement_sparsity/model_sparsity": 0.07363463887404698, "compression_loss": 21.739891052246094, "distillation_loss": 0.29105985164642334, "epoch": 2.22, "learning_rate": 4.323283554052785e-05, "loss": 22.4226, "step": 2624, "task_loss": 0.2771998941898346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20246102984861447, "compression/movement_sparsity/importance_threshold": -0.005585751455230042, "compression/movement_sparsity/linear_layer_sparsity": 0.07685862954873608, "compression/movement_sparsity/model_sparsity": 0.07421829917893645, "compression_loss": 21.81793212890625, "distillation_loss": 0.39496883749961853, "epoch": 2.22, "learning_rate": 4.322813938198554e-05, "loss": 22.3592, "step": 2625, "task_loss": 0.7103185653686523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20318716543692117, "compression/movement_sparsity/importance_threshold": -0.005580665794126475, "compression/movement_sparsity/linear_layer_sparsity": 0.07767674669023264, "compression/movement_sparsity/model_sparsity": 0.07500831147979989, "compression_loss": 21.895919799804688, "distillation_loss": 0.517465353012085, "epoch": 2.22, "learning_rate": 4.322344322344323e-05, "loss": 22.4793, "step": 2626, "task_loss": 0.5246644616127014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20391286014145993, "compression/movement_sparsity/importance_threshold": -0.005575583220855903, "compression/movement_sparsity/linear_layer_sparsity": 0.07824758044329667, "compression/movement_sparsity/model_sparsity": 0.07555953533735615, "compression_loss": 21.973861694335938, "distillation_loss": 0.35133275389671326, "epoch": 2.22, "learning_rate": 4.321874706490091e-05, "loss": 22.4376, "step": 2627, "task_loss": 0.5254411697387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20463811409611565, "compression/movement_sparsity/importance_threshold": -0.005570503734480635, "compression/movement_sparsity/linear_layer_sparsity": 0.07897277254640696, "compression/movement_sparsity/model_sparsity": 0.07625981486077274, "compression_loss": 22.051734924316406, "distillation_loss": 0.5293159484863281, "epoch": 2.22, "learning_rate": 4.32140509063586e-05, "loss": 22.5723, "step": 2628, "task_loss": 1.3881436586380005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20536292743477358, "compression/movement_sparsity/importance_threshold": -0.005565427334062973, "compression/movement_sparsity/linear_layer_sparsity": 0.07953750112564142, "compression/movement_sparsity/model_sparsity": 0.07680514327600219, "compression_loss": 22.129554748535156, "distillation_loss": 0.42485934495925903, "epoch": 2.22, "learning_rate": 4.3209354747816286e-05, "loss": 22.5856, "step": 2629, "task_loss": 0.7008118033409119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20608730029131894, "compression/movement_sparsity/importance_threshold": -0.005560354018665219, "compression/movement_sparsity/linear_layer_sparsity": 0.08024131319618061, "compression/movement_sparsity/model_sparsity": 0.07748477723673916, "compression_loss": 22.207319259643555, "distillation_loss": 0.5170126557350159, "epoch": 2.22, "learning_rate": 4.320465858927398e-05, "loss": 22.6649, "step": 2630, "task_loss": 0.18735742568969727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20681123279963676, "compression/movement_sparsity/importance_threshold": -0.00555528378734968, "compression/movement_sparsity/linear_layer_sparsity": 0.08094801091528767, "compression/movement_sparsity/model_sparsity": 0.07816719771513843, "compression_loss": 22.285032272338867, "distillation_loss": 0.6301615238189697, "epoch": 2.22, "learning_rate": 4.319996243073166e-05, "loss": 22.7945, "step": 2631, "task_loss": 0.7999787926673889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20753472509361226, "compression/movement_sparsity/importance_threshold": -0.005550216639178658, "compression/movement_sparsity/linear_layer_sparsity": 0.08151272757035449, "compression/movement_sparsity/model_sparsity": 0.07871251461583208, "compression_loss": 22.362688064575195, "distillation_loss": 0.7996900081634521, "epoch": 2.22, "learning_rate": 4.319526627218935e-05, "loss": 23.0076, "step": 2632, "task_loss": 1.283111572265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20825777730713058, "compression/movement_sparsity/importance_threshold": -0.005545152573214457, "compression/movement_sparsity/linear_layer_sparsity": 0.08229135186864107, "compression/movement_sparsity/model_sparsity": 0.07946439077414401, "compression_loss": 22.440296173095703, "distillation_loss": 0.43262556195259094, "epoch": 2.23, "learning_rate": 4.319057011364704e-05, "loss": 22.9309, "step": 2633, "task_loss": 0.22137288749217987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2089803895740765, "compression/movement_sparsity/importance_threshold": -0.005540091588519385, "compression/movement_sparsity/linear_layer_sparsity": 0.08296444728335027, "compression/movement_sparsity/model_sparsity": 0.08011436329067424, "compression_loss": 22.517831802368164, "distillation_loss": 0.7679336071014404, "epoch": 2.23, "learning_rate": 4.3185873955104724e-05, "loss": 23.2776, "step": 2634, "task_loss": 0.5178093910217285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2097025620283357, "compression/movement_sparsity/importance_threshold": -0.00553503368415574, "compression/movement_sparsity/linear_layer_sparsity": 0.08357496466630646, "compression/movement_sparsity/model_sparsity": 0.08070390752335473, "compression_loss": 22.595335006713867, "distillation_loss": 0.36907321214675903, "epoch": 2.23, "learning_rate": 4.318117779656242e-05, "loss": 23.3036, "step": 2635, "task_loss": 1.0599133968353271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2104242948037932, "compression/movement_sparsity/importance_threshold": -0.005529978859185828, "compression/movement_sparsity/linear_layer_sparsity": 0.08416126406479421, "compression/movement_sparsity/model_sparsity": 0.08127006573383651, "compression_loss": 22.67280387878418, "distillation_loss": 0.36618030071258545, "epoch": 2.23, "learning_rate": 4.31764816380201e-05, "loss": 23.2935, "step": 2636, "task_loss": 1.2144086360931396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21114558803433403, "compression/movement_sparsity/importance_threshold": -0.005524927112671954, "compression/movement_sparsity/linear_layer_sparsity": 0.08492784495376857, "compression/movement_sparsity/model_sparsity": 0.08201031221099594, "compression_loss": 22.750207901000977, "distillation_loss": 0.9153242707252502, "epoch": 2.23, "learning_rate": 4.317178547947779e-05, "loss": 23.4697, "step": 2637, "task_loss": 0.7148751616477966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2118664418538433, "compression/movement_sparsity/importance_threshold": -0.005519878443676422, "compression/movement_sparsity/linear_layer_sparsity": 0.08551243919628439, "compression/movement_sparsity/model_sparsity": 0.08257482384285911, "compression_loss": 22.82757568359375, "distillation_loss": 0.33101460337638855, "epoch": 2.23, "learning_rate": 4.3167089320935477e-05, "loss": 23.3392, "step": 2638, "task_loss": 0.04958055540919304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21258685639620623, "compression/movement_sparsity/importance_threshold": -0.005514832851261534, "compression/movement_sparsity/linear_layer_sparsity": 0.08619643330021276, "compression/movement_sparsity/model_sparsity": 0.08323532064510555, "compression_loss": 22.9049015045166, "distillation_loss": 0.6901434659957886, "epoch": 2.23, "learning_rate": 4.316239316239317e-05, "loss": 23.6203, "step": 2639, "task_loss": 0.4386698305606842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21330683179530796, "compression/movement_sparsity/importance_threshold": -0.005509790334489596, "compression/movement_sparsity/linear_layer_sparsity": 0.0868466343130611, "compression/movement_sparsity/model_sparsity": 0.08386318525291027, "compression_loss": 22.98215675354004, "distillation_loss": 0.5602027177810669, "epoch": 2.23, "learning_rate": 4.315769700385085e-05, "loss": 23.5612, "step": 2640, "task_loss": 0.577620267868042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21402636818503362, "compression/movement_sparsity/importance_threshold": -0.005504750892422913, "compression/movement_sparsity/linear_layer_sparsity": 0.0874571516960173, "compression/movement_sparsity/model_sparsity": 0.08445272948559077, "compression_loss": 23.059404373168945, "distillation_loss": 0.7182589769363403, "epoch": 2.23, "learning_rate": 4.3153000845308536e-05, "loss": 23.7557, "step": 2641, "task_loss": 1.621577501296997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21474546569926822, "compression/movement_sparsity/importance_threshold": -0.005499714524123786, "compression/movement_sparsity/linear_layer_sparsity": 0.08797609147153004, "compression/movement_sparsity/model_sparsity": 0.08495384208336917, "compression_loss": 23.13658332824707, "distillation_loss": 0.5763847231864929, "epoch": 2.23, "learning_rate": 4.314830468676623e-05, "loss": 23.7796, "step": 2642, "task_loss": 0.7239302396774292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2154641244718971, "compression/movement_sparsity/importance_threshold": -0.00549468122865452, "compression/movement_sparsity/linear_layer_sparsity": 0.08858682348950368, "compression/movement_sparsity/model_sparsity": 0.08554359357769396, "compression_loss": 23.213701248168945, "distillation_loss": 0.2924191653728485, "epoch": 2.23, "learning_rate": 4.3143608528223915e-05, "loss": 23.7901, "step": 2643, "task_loss": 0.8006916642189026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2161823446368053, "compression/movement_sparsity/importance_threshold": -0.005489651005077421, "compression/movement_sparsity/linear_layer_sparsity": 0.0893377598705398, "compression/movement_sparsity/model_sparsity": 0.08626873298389097, "compression_loss": 23.290775299072266, "distillation_loss": 0.5186960101127625, "epoch": 2.23, "learning_rate": 4.31389123696816e-05, "loss": 23.9969, "step": 2644, "task_loss": 0.6446994543075562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2169001263278777, "compression/movement_sparsity/importance_threshold": -0.005484623852454792, "compression/movement_sparsity/linear_layer_sparsity": 0.09000170944867228, "compression/movement_sparsity/model_sparsity": 0.0869098738514668, "compression_loss": 23.36781120300293, "distillation_loss": 0.5910739898681641, "epoch": 2.24, "learning_rate": 4.313421621113929e-05, "loss": 23.8928, "step": 2645, "task_loss": 0.5940378904342651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21761746967899998, "compression/movement_sparsity/importance_threshold": -0.0054795997698489345, "compression/movement_sparsity/linear_layer_sparsity": 0.09055423960441526, "compression/movement_sparsity/model_sparsity": 0.08744342289657843, "compression_loss": 23.44477081298828, "distillation_loss": 0.3057408630847931, "epoch": 2.24, "learning_rate": 4.312952005259698e-05, "loss": 23.9449, "step": 2646, "task_loss": 0.33401161432266235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21833437482405704, "compression/movement_sparsity/importance_threshold": -0.005474578756322155, "compression/movement_sparsity/linear_layer_sparsity": 0.09131432182202807, "compression/movement_sparsity/model_sparsity": 0.08817739395172984, "compression_loss": 23.52169418334961, "distillation_loss": 0.32448580861091614, "epoch": 2.24, "learning_rate": 4.312482389405467e-05, "loss": 24.0271, "step": 2647, "task_loss": 0.8096652030944824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2190508418969339, "compression/movement_sparsity/importance_threshold": -0.005469560810936756, "compression/movement_sparsity/linear_layer_sparsity": 0.09198590286744752, "compression/movement_sparsity/model_sparsity": 0.08882590412221418, "compression_loss": 23.598539352416992, "distillation_loss": 0.35537588596343994, "epoch": 2.24, "learning_rate": 4.3120127735512354e-05, "loss": 23.9784, "step": 2648, "task_loss": 0.3334835469722748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21976687103151593, "compression/movement_sparsity/importance_threshold": -0.005464545932755043, "compression/movement_sparsity/linear_layer_sparsity": 0.09271547114008016, "compression/movement_sparsity/model_sparsity": 0.08953040948026736, "compression_loss": 23.675331115722656, "distillation_loss": 0.3941245675086975, "epoch": 2.24, "learning_rate": 4.311543157697004e-05, "loss": 24.3316, "step": 2649, "task_loss": 0.7037419080734253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.220482462361688, "compression/movement_sparsity/importance_threshold": -0.005459534120839319, "compression/movement_sparsity/linear_layer_sparsity": 0.09344505133688044, "compression/movement_sparsity/model_sparsity": 0.09023492635285633, "compression_loss": 23.75208282470703, "distillation_loss": 0.8570448756217957, "epoch": 2.24, "learning_rate": 4.3110735418427726e-05, "loss": 24.4649, "step": 2650, "task_loss": 0.5637337565422058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22119761602133547, "compression/movement_sparsity/importance_threshold": -0.005454525374251888, "compression/movement_sparsity/linear_layer_sparsity": 0.09423874778305875, "compression/movement_sparsity/model_sparsity": 0.09100135688441256, "compression_loss": 23.82878303527832, "distillation_loss": 0.3655344545841217, "epoch": 2.24, "learning_rate": 4.310603925988542e-05, "loss": 24.4007, "step": 2651, "task_loss": 0.6456930637359619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22191233214434336, "compression/movement_sparsity/importance_threshold": -0.005449519692055055, "compression/movement_sparsity/linear_layer_sparsity": 0.09487372163383609, "compression/movement_sparsity/model_sparsity": 0.09161451743000766, "compression_loss": 23.905481338500977, "distillation_loss": 0.5830286741256714, "epoch": 2.24, "learning_rate": 4.3101343101343106e-05, "loss": 24.5498, "step": 2652, "task_loss": 0.5962634682655334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2226266108645969, "compression/movement_sparsity/importance_threshold": -0.005444517073311122, "compression/movement_sparsity/linear_layer_sparsity": 0.09561263845389525, "compression/movement_sparsity/model_sparsity": 0.09232805018412377, "compression_loss": 23.98211669921875, "distillation_loss": 0.5124383568763733, "epoch": 2.24, "learning_rate": 4.309664694280079e-05, "loss": 24.5545, "step": 2653, "task_loss": 0.18641330301761627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2233404523159811, "compression/movement_sparsity/importance_threshold": -0.005439517517082395, "compression/movement_sparsity/linear_layer_sparsity": 0.09640648991425284, "compression/movement_sparsity/model_sparsity": 0.09309463040464531, "compression_loss": 24.058698654174805, "distillation_loss": 0.3970463275909424, "epoch": 2.24, "learning_rate": 4.309195078425848e-05, "loss": 24.4963, "step": 2654, "task_loss": 1.1362547874450684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2240538566323813, "compression/movement_sparsity/importance_threshold": -0.005434521022431176, "compression/movement_sparsity/linear_layer_sparsity": 0.09717575374094527, "compression/movement_sparsity/model_sparsity": 0.09383746765235852, "compression_loss": 24.13520050048828, "distillation_loss": 0.27783599495887756, "epoch": 2.24, "learning_rate": 4.3087254625716165e-05, "loss": 24.6677, "step": 2655, "task_loss": 1.9453346729278564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2247668239476821, "compression/movement_sparsity/importance_threshold": -0.005429527588419774, "compression/movement_sparsity/linear_layer_sparsity": 0.09798030117967221, "compression/movement_sparsity/model_sparsity": 0.09461437641148777, "compression_loss": 24.211641311645508, "distillation_loss": 0.5363422632217407, "epoch": 2.24, "learning_rate": 4.308255846717386e-05, "loss": 24.9002, "step": 2656, "task_loss": 1.1082571744918823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22547935439576938, "compression/movement_sparsity/importance_threshold": -0.005424537214110486, "compression/movement_sparsity/linear_layer_sparsity": 0.0987314283473905, "compression/movement_sparsity/model_sparsity": 0.09533970005025749, "compression_loss": 24.288034439086914, "distillation_loss": 0.6401575207710266, "epoch": 2.25, "learning_rate": 4.307786230863154e-05, "loss": 24.8531, "step": 2657, "task_loss": 1.1238659620285034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22619144811052805, "compression/movement_sparsity/importance_threshold": -0.005419549898565619, "compression/movement_sparsity/linear_layer_sparsity": 0.0994396404357873, "compression/movement_sparsity/model_sparsity": 0.09602358287470265, "compression_loss": 24.364395141601562, "distillation_loss": 0.5819977521896362, "epoch": 2.25, "learning_rate": 4.307316615008923e-05, "loss": 25.0142, "step": 2658, "task_loss": 0.9651921391487122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22690310522584312, "compression/movement_sparsity/importance_threshold": -0.005414565640847477, "compression/movement_sparsity/linear_layer_sparsity": 0.10020909504916191, "compression/movement_sparsity/model_sparsity": 0.09676660435498857, "compression_loss": 24.44072151184082, "distillation_loss": 0.40604087710380554, "epoch": 2.25, "learning_rate": 4.306846999154692e-05, "loss": 25.0095, "step": 2659, "task_loss": 1.1372036933898926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2276143258755997, "compression/movement_sparsity/importance_threshold": -0.005409584440018365, "compression/movement_sparsity/linear_layer_sparsity": 0.10101194925608457, "compression/movement_sparsity/model_sparsity": 0.09754187805003502, "compression_loss": 24.51698112487793, "distillation_loss": 0.6189146041870117, "epoch": 2.25, "learning_rate": 4.30637738330046e-05, "loss": 25.2917, "step": 2660, "task_loss": 0.4809848964214325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22832511019368307, "compression/movement_sparsity/importance_threshold": -0.005404606295140585, "compression/movement_sparsity/linear_layer_sparsity": 0.10173543620322292, "compression/movement_sparsity/model_sparsity": 0.09824051099483298, "compression_loss": 24.593198776245117, "distillation_loss": 0.6007993817329407, "epoch": 2.25, "learning_rate": 4.305907767446229e-05, "loss": 25.0992, "step": 2661, "task_loss": 0.20378516614437103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2290354583139782, "compression/movement_sparsity/importance_threshold": -0.005399631205276443, "compression/movement_sparsity/linear_layer_sparsity": 0.10242535661846632, "compression/movement_sparsity/model_sparsity": 0.09890673052136932, "compression_loss": 24.669363021850586, "distillation_loss": 0.668059766292572, "epoch": 2.25, "learning_rate": 4.3054381515919976e-05, "loss": 25.1931, "step": 2662, "task_loss": 0.9398033022880554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22974537037037046, "compression/movement_sparsity/importance_threshold": -0.005394659169488241, "compression/movement_sparsity/linear_layer_sparsity": 0.10314883164143704, "compression/movement_sparsity/model_sparsity": 0.09960535195163149, "compression_loss": 24.7454833984375, "distillation_loss": 0.6150960326194763, "epoch": 2.25, "learning_rate": 4.304968535737767e-05, "loss": 25.4581, "step": 2663, "task_loss": 0.9186568260192871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23045484649674486, "compression/movement_sparsity/importance_threshold": -0.0053896901868382835, "compression/movement_sparsity/linear_layer_sparsity": 0.10380839312587953, "compression/movement_sparsity/model_sparsity": 0.10024225547003493, "compression_loss": 24.821561813354492, "distillation_loss": 0.5443013310432434, "epoch": 2.25, "learning_rate": 4.3044989198835355e-05, "loss": 25.3143, "step": 2664, "task_loss": 0.8609303832054138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2311638868269862, "compression/movement_sparsity/importance_threshold": -0.0053847242563888785, "compression/movement_sparsity/linear_layer_sparsity": 0.10450153306638461, "compression/movement_sparsity/model_sparsity": 0.10091158392123578, "compression_loss": 24.8975772857666, "distillation_loss": 0.3234819769859314, "epoch": 2.25, "learning_rate": 4.304029304029304e-05, "loss": 25.3424, "step": 2665, "task_loss": 0.2265138328075409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23187249149498024, "compression/movement_sparsity/importance_threshold": -0.0053797613772023235, "compression/movement_sparsity/linear_layer_sparsity": 0.1053259461683929, "compression/movement_sparsity/model_sparsity": 0.10170767589699875, "compression_loss": 24.973535537719727, "distillation_loss": 0.7286474704742432, "epoch": 2.25, "learning_rate": 4.303559688175073e-05, "loss": 25.4991, "step": 2666, "task_loss": 1.1004260778427124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2325806606346119, "compression/movement_sparsity/importance_threshold": -0.005374801548340926, "compression/movement_sparsity/linear_layer_sparsity": 0.10605860280044324, "compression/movement_sparsity/model_sparsity": 0.10241516351982272, "compression_loss": 25.049453735351562, "distillation_loss": 1.0396184921264648, "epoch": 2.25, "learning_rate": 4.3030900723208414e-05, "loss": 25.7959, "step": 2667, "task_loss": 1.1754810810089111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23328839437976623, "compression/movement_sparsity/importance_threshold": -0.005369844768866989, "compression/movement_sparsity/linear_layer_sparsity": 0.1069381294052645, "compression/movement_sparsity/model_sparsity": 0.10326447568002804, "compression_loss": 25.125316619873047, "distillation_loss": 0.7221930027008057, "epoch": 2.26, "learning_rate": 4.302620456466611e-05, "loss": 25.7886, "step": 2668, "task_loss": 0.7294876575469971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23399569286432842, "compression/movement_sparsity/importance_threshold": -0.005364891037842817, "compression/movement_sparsity/linear_layer_sparsity": 0.10767685543864147, "compression/movement_sparsity/model_sparsity": 0.10397782420157144, "compression_loss": 25.201139450073242, "distillation_loss": 0.7972719669342041, "epoch": 2.26, "learning_rate": 4.3021508406123794e-05, "loss": 25.789, "step": 2669, "task_loss": 0.6992921829223633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2347025562221836, "compression/movement_sparsity/importance_threshold": -0.005359940354330713, "compression/movement_sparsity/linear_layer_sparsity": 0.10856211756809558, "compression/movement_sparsity/model_sparsity": 0.10483267485349394, "compression_loss": 25.276906967163086, "distillation_loss": 0.43806836009025574, "epoch": 2.26, "learning_rate": 4.301681224758148e-05, "loss": 25.7421, "step": 2670, "task_loss": 0.3824222981929779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23540898458721693, "compression/movement_sparsity/importance_threshold": -0.005354992717392983, "compression/movement_sparsity/linear_layer_sparsity": 0.10926136268243022, "compression/movement_sparsity/model_sparsity": 0.1055078987470216, "compression_loss": 25.352636337280273, "distillation_loss": 0.8374093174934387, "epoch": 2.26, "learning_rate": 4.3012116089039166e-05, "loss": 26.0694, "step": 2671, "task_loss": 0.28923991322517395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2361149780933135, "compression/movement_sparsity/importance_threshold": -0.005350048126091929, "compression/movement_sparsity/linear_layer_sparsity": 0.1100203359524529, "compression/movement_sparsity/model_sparsity": 0.10624079895034413, "compression_loss": 25.428356170654297, "distillation_loss": 0.43708139657974243, "epoch": 2.26, "learning_rate": 4.300741993049685e-05, "loss": 25.9787, "step": 2672, "task_loss": 1.574414849281311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23682053687435844, "compression/movement_sparsity/importance_threshold": -0.005345106579489856, "compression/movement_sparsity/linear_layer_sparsity": 0.1106674963025541, "compression/movement_sparsity/model_sparsity": 0.10686572735152124, "compression_loss": 25.503999710083008, "distillation_loss": 0.8075686693191528, "epoch": 2.26, "learning_rate": 4.3002723771954546e-05, "loss": 26.0629, "step": 2673, "task_loss": 0.993084192276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23752566106423678, "compression/movement_sparsity/importance_threshold": -0.005340168076649069, "compression/movement_sparsity/linear_layer_sparsity": 0.11130551081607859, "compression/movement_sparsity/model_sparsity": 0.10748182410374395, "compression_loss": 25.579599380493164, "distillation_loss": 1.2198083400726318, "epoch": 2.26, "learning_rate": 4.2998027613412225e-05, "loss": 26.2822, "step": 2674, "task_loss": 0.8923405408859253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2382303507968342, "compression/movement_sparsity/importance_threshold": -0.0053352326166318685, "compression/movement_sparsity/linear_layer_sparsity": 0.11205972634321455, "compression/movement_sparsity/model_sparsity": 0.10821013000728444, "compression_loss": 25.655153274536133, "distillation_loss": 0.430992990732193, "epoch": 2.26, "learning_rate": 4.299333145486992e-05, "loss": 26.1573, "step": 2675, "task_loss": 0.9558709263801575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23893460620603502, "compression/movement_sparsity/importance_threshold": -0.005330300198500563, "compression/movement_sparsity/linear_layer_sparsity": 0.11275744516409181, "compression/movement_sparsity/model_sparsity": 0.1088838800402304, "compression_loss": 25.73065185546875, "distillation_loss": 0.726870059967041, "epoch": 2.26, "learning_rate": 4.2988635296327605e-05, "loss": 26.6212, "step": 2676, "task_loss": 0.8127005100250244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23963842742572505, "compression/movement_sparsity/importance_threshold": -0.005325370821317453, "compression/movement_sparsity/linear_layer_sparsity": 0.11351776586505734, "compression/movement_sparsity/model_sparsity": 0.10961808138609772, "compression_loss": 25.80610466003418, "distillation_loss": 0.4320061206817627, "epoch": 2.26, "learning_rate": 4.29839391377853e-05, "loss": 26.1872, "step": 2677, "task_loss": 0.13300257921218872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2403418145897892, "compression/movement_sparsity/importance_threshold": -0.005320444484144844, "compression/movement_sparsity/linear_layer_sparsity": 0.11421835841033484, "compression/movement_sparsity/model_sparsity": 0.11029460642217016, "compression_loss": 25.88150405883789, "distillation_loss": 0.8299775123596191, "epoch": 2.26, "learning_rate": 4.2979242979242984e-05, "loss": 26.4282, "step": 2678, "task_loss": 0.6602033376693726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2410447678321126, "compression/movement_sparsity/importance_threshold": -0.005315521186045039, "compression/movement_sparsity/linear_layer_sparsity": 0.11499066289977442, "compression/movement_sparsity/model_sparsity": 0.11104037987651098, "compression_loss": 25.956823348999023, "distillation_loss": 0.36130291223526, "epoch": 2.26, "learning_rate": 4.297454682070067e-05, "loss": 26.5739, "step": 2679, "task_loss": 0.7530396580696106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24174728728658035, "compression/movement_sparsity/importance_threshold": -0.005310600926080343, "compression/movement_sparsity/linear_layer_sparsity": 0.11569907769902103, "compression/movement_sparsity/model_sparsity": 0.11172445844806465, "compression_loss": 26.032115936279297, "distillation_loss": 0.6786223649978638, "epoch": 2.27, "learning_rate": 4.296985066215836e-05, "loss": 26.6305, "step": 2680, "task_loss": 1.134964942932129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2424493730870777, "compression/movement_sparsity/importance_threshold": -0.005305683703313059, "compression/movement_sparsity/linear_layer_sparsity": 0.1163613221211816, "compression/movement_sparsity/model_sparsity": 0.11236395273702185, "compression_loss": 26.107370376586914, "distillation_loss": 0.5917405486106873, "epoch": 2.27, "learning_rate": 4.296515450361604e-05, "loss": 26.7383, "step": 2681, "task_loss": 0.8230888843536377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24315102536748967, "compression/movement_sparsity/importance_threshold": -0.005300769516805493, "compression/movement_sparsity/linear_layer_sparsity": 0.11708786165523473, "compression/movement_sparsity/model_sparsity": 0.11306553340298323, "compression_loss": 26.182559967041016, "distillation_loss": 0.6224719285964966, "epoch": 2.27, "learning_rate": 4.2960458345073736e-05, "loss": 26.837, "step": 2682, "task_loss": 0.9494261741638184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2438522442617015, "compression/movement_sparsity/importance_threshold": -0.005295858365619946, "compression/movement_sparsity/linear_layer_sparsity": 0.11786633093934204, "compression/movement_sparsity/model_sparsity": 0.11381725987232982, "compression_loss": 26.257699966430664, "distillation_loss": 0.616378664970398, "epoch": 2.27, "learning_rate": 4.2955762186531416e-05, "loss": 26.7586, "step": 2683, "task_loss": 1.1050190925598145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24455302990359828, "compression/movement_sparsity/importance_threshold": -0.005290950248818723, "compression/movement_sparsity/linear_layer_sparsity": 0.11858961517563059, "compression/movement_sparsity/model_sparsity": 0.11451569707001928, "compression_loss": 26.33282470703125, "distillation_loss": 1.0202922821044922, "epoch": 2.27, "learning_rate": 4.295106602798911e-05, "loss": 26.9108, "step": 2684, "task_loss": 0.9176163673400879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24525338242706485, "compression/movement_sparsity/importance_threshold": -0.0052860451654641325, "compression/movement_sparsity/linear_layer_sparsity": 0.1191909986461777, "compression/movement_sparsity/model_sparsity": 0.11509642116828116, "compression_loss": 26.40787124633789, "distillation_loss": 0.37210893630981445, "epoch": 2.27, "learning_rate": 4.2946369869446796e-05, "loss": 26.9026, "step": 2685, "task_loss": 1.5308212041854858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24595330196598697, "compression/movement_sparsity/importance_threshold": -0.005281143114618471, "compression/movement_sparsity/linear_layer_sparsity": 0.11997283054555834, "compression/movement_sparsity/model_sparsity": 0.1158513947367218, "compression_loss": 26.48292350769043, "distillation_loss": 0.8762179017066956, "epoch": 2.27, "learning_rate": 4.294167371090448e-05, "loss": 27.1784, "step": 2686, "task_loss": 0.8934304714202881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24665278865424944, "compression/movement_sparsity/importance_threshold": -0.005276244095344047, "compression/movement_sparsity/linear_layer_sparsity": 0.1206138976459976, "compression/movement_sparsity/model_sparsity": 0.11647043921010791, "compression_loss": 26.557924270629883, "distillation_loss": 0.8376360535621643, "epoch": 2.27, "learning_rate": 4.293697755236217e-05, "loss": 27.3217, "step": 2687, "task_loss": 1.2399060726165771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2473518426257375, "compression/movement_sparsity/importance_threshold": -0.005271348106703163, "compression/movement_sparsity/linear_layer_sparsity": 0.1212490265109542, "compression/movement_sparsity/model_sparsity": 0.11708374944466833, "compression_loss": 26.632892608642578, "distillation_loss": 0.62364262342453, "epoch": 2.27, "learning_rate": 4.2932281393819855e-05, "loss": 27.3694, "step": 2688, "task_loss": 1.3516508340835571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24805046401433617, "compression/movement_sparsity/importance_threshold": -0.005266455147758123, "compression/movement_sparsity/linear_layer_sparsity": 0.12196339146985112, "compression/movement_sparsity/model_sparsity": 0.11777357376958347, "compression_loss": 26.70783042907715, "distillation_loss": 0.3320034146308899, "epoch": 2.27, "learning_rate": 4.292758523527755e-05, "loss": 27.2209, "step": 2689, "task_loss": 0.23361852765083313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24874865295393067, "compression/movement_sparsity/importance_threshold": -0.005261565217571232, "compression/movement_sparsity/linear_layer_sparsity": 0.12246403957221047, "compression/movement_sparsity/model_sparsity": 0.11825702306945307, "compression_loss": 26.782724380493164, "distillation_loss": 0.5519956946372986, "epoch": 2.27, "learning_rate": 4.2922889076735234e-05, "loss": 27.356, "step": 2690, "task_loss": 1.7913850545883179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24944640957840603, "compression/movement_sparsity/importance_threshold": -0.0052566783152047936, "compression/movement_sparsity/linear_layer_sparsity": 0.12316328468654511, "compression/movement_sparsity/model_sparsity": 0.11893224696298073, "compression_loss": 26.85756492614746, "distillation_loss": 0.6947104930877686, "epoch": 2.27, "learning_rate": 4.291819291819292e-05, "loss": 27.5564, "step": 2691, "task_loss": 1.0279330015182495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2501437340216476, "compression/movement_sparsity/importance_threshold": -0.005251794439721111, "compression/movement_sparsity/linear_layer_sparsity": 0.12376778036484519, "compression/movement_sparsity/model_sparsity": 0.11951597635508499, "compression_loss": 26.932369232177734, "distillation_loss": 0.5363233685493469, "epoch": 2.28, "learning_rate": 4.291349675965061e-05, "loss": 27.4958, "step": 2692, "task_loss": 0.7387153506278992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25084062641754024, "compression/movement_sparsity/importance_threshold": -0.0052469135901824894, "compression/movement_sparsity/linear_layer_sparsity": 0.1243664689735066, "compression/movement_sparsity/model_sparsity": 0.12009409816825728, "compression_loss": 27.007102966308594, "distillation_loss": 0.41162562370300293, "epoch": 2.28, "learning_rate": 4.290880060110829e-05, "loss": 27.5631, "step": 2693, "task_loss": 0.5103681087493896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25153708689996934, "compression/movement_sparsity/importance_threshold": -0.005242035765651232, "compression/movement_sparsity/linear_layer_sparsity": 0.12496022097676676, "compression/movement_sparsity/model_sparsity": 0.12066745296361064, "compression_loss": 27.08177375793457, "distillation_loss": 0.6186453104019165, "epoch": 2.28, "learning_rate": 4.2904104442565986e-05, "loss": 27.6486, "step": 2694, "task_loss": 1.090255856513977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2522331156028199, "compression/movement_sparsity/importance_threshold": -0.005237160965189642, "compression/movement_sparsity/linear_layer_sparsity": 0.12549767898461803, "compression/movement_sparsity/model_sparsity": 0.12118644763547798, "compression_loss": 27.156396865844727, "distillation_loss": 0.5647468566894531, "epoch": 2.28, "learning_rate": 4.289940828402367e-05, "loss": 27.7291, "step": 2695, "task_loss": 0.6037712693214417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25292871265997685, "compression/movement_sparsity/importance_threshold": -0.005232289187860027, "compression/movement_sparsity/linear_layer_sparsity": 0.12612195685702599, "compression/movement_sparsity/model_sparsity": 0.12178927964246537, "compression_loss": 27.23097038269043, "distillation_loss": 0.4375886619091034, "epoch": 2.28, "learning_rate": 4.289471212548136e-05, "loss": 27.8029, "step": 2696, "task_loss": 1.437133550643921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25362387820532584, "compression/movement_sparsity/importance_threshold": -0.005227420432724686, "compression/movement_sparsity/linear_layer_sparsity": 0.1266073062523085, "compression/movement_sparsity/model_sparsity": 0.12225795579291057, "compression_loss": 27.305477142333984, "distillation_loss": 1.0117186307907104, "epoch": 2.28, "learning_rate": 4.2890015966939045e-05, "loss": 28.0432, "step": 2697, "task_loss": 0.5592117309570312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2543186123727518, "compression/movement_sparsity/importance_threshold": -0.0052225546988459255, "compression/movement_sparsity/linear_layer_sparsity": 0.12726474523491182, "compression/movement_sparsity/model_sparsity": 0.12289280972394258, "compression_loss": 27.37994384765625, "distillation_loss": 0.4647761285305023, "epoch": 2.28, "learning_rate": 4.288531980839673e-05, "loss": 28.0377, "step": 2698, "task_loss": 0.0678580030798912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2550129152961398, "compression/movement_sparsity/importance_threshold": -0.005217691985286049, "compression/movement_sparsity/linear_layer_sparsity": 0.127994361204215, "compression/movement_sparsity/model_sparsity": 0.12359736114013893, "compression_loss": 27.45435905456543, "distillation_loss": 0.9605753421783447, "epoch": 2.28, "learning_rate": 4.2880623649854425e-05, "loss": 28.1652, "step": 2699, "task_loss": 2.166354179382324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.255706787109375, "compression/movement_sparsity/importance_threshold": -0.005212832291107361, "compression/movement_sparsity/linear_layer_sparsity": 0.12869359439438202, "compression/movement_sparsity/model_sparsity": 0.1242725735191308, "compression_loss": 27.52870750427246, "distillation_loss": 0.3408774733543396, "epoch": 2.28, "learning_rate": 4.2875927491312104e-05, "loss": 28.0606, "step": 2700, "task_loss": 0.9944912195205688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25640022794634254, "compression/movement_sparsity/importance_threshold": -0.005207975615372165, "compression/movement_sparsity/linear_layer_sparsity": 0.1293686096000806, "compression/movement_sparsity/model_sparsity": 0.12492439987592396, "compression_loss": 27.602998733520508, "distillation_loss": 0.30047574639320374, "epoch": 2.28, "learning_rate": 4.28712313327698e-05, "loss": 27.995, "step": 2701, "task_loss": 0.11674318462610245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25709323794092764, "compression/movement_sparsity/importance_threshold": -0.005203121957142765, "compression/movement_sparsity/linear_layer_sparsity": 0.130003547678355, "compression/movement_sparsity/model_sparsity": 0.12553752587791167, "compression_loss": 27.677228927612305, "distillation_loss": 0.3178798258304596, "epoch": 2.28, "learning_rate": 4.2866535174227484e-05, "loss": 27.9639, "step": 2702, "task_loss": 0.40651875734329224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2577858172270152, "compression/movement_sparsity/importance_threshold": -0.005198271315481466, "compression/movement_sparsity/linear_layer_sparsity": 0.1306783363248685, "compression/movement_sparsity/model_sparsity": 0.12618913345852473, "compression_loss": 27.75139617919922, "distillation_loss": 0.6656696200370789, "epoch": 2.28, "learning_rate": 4.286183901568518e-05, "loss": 28.4294, "step": 2703, "task_loss": 0.8249683976173401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2584779659384907, "compression/movement_sparsity/importance_threshold": -0.0051934236894505695, "compression/movement_sparsity/linear_layer_sparsity": 0.1314443687021316, "compression/movement_sparsity/model_sparsity": 0.12692885026703762, "compression_loss": 27.825525283813477, "distillation_loss": 0.5948188304901123, "epoch": 2.29, "learning_rate": 4.2857142857142856e-05, "loss": 28.5727, "step": 2704, "task_loss": 1.3130605220794678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2591696842092388, "compression/movement_sparsity/importance_threshold": -0.005188579078112384, "compression/movement_sparsity/linear_layer_sparsity": 0.13224436110882168, "compression/movement_sparsity/model_sparsity": 0.12770136047349337, "compression_loss": 27.899625778198242, "distillation_loss": 0.6174355745315552, "epoch": 2.29, "learning_rate": 4.285244669860054e-05, "loss": 28.567, "step": 2705, "task_loss": 0.6900280117988586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2598609721731452, "compression/movement_sparsity/importance_threshold": -0.005183737480529209, "compression/movement_sparsity/linear_layer_sparsity": 0.1330290786567702, "compression/movement_sparsity/model_sparsity": 0.1284591205595963, "compression_loss": 27.973655700683594, "distillation_loss": 0.46737998723983765, "epoch": 2.29, "learning_rate": 4.2847750540058236e-05, "loss": 28.4288, "step": 2706, "task_loss": 0.6539822816848755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26055182996409476, "compression/movement_sparsity/importance_threshold": -0.005178898895763349, "compression/movement_sparsity/linear_layer_sparsity": 0.13371898714784594, "compression/movement_sparsity/model_sparsity": 0.12912532857159684, "compression_loss": 28.047645568847656, "distillation_loss": 0.3651961386203766, "epoch": 2.29, "learning_rate": 4.284305438151592e-05, "loss": 28.5704, "step": 2707, "task_loss": 0.5148100256919861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26124225771597265, "compression/movement_sparsity/importance_threshold": -0.00517406332287711, "compression/movement_sparsity/linear_layer_sparsity": 0.1344594660338654, "compression/movement_sparsity/model_sparsity": 0.12984036972990204, "compression_loss": 28.121553421020508, "distillation_loss": 0.5852566361427307, "epoch": 2.29, "learning_rate": 4.283835822297361e-05, "loss": 28.6833, "step": 2708, "task_loss": 0.4743536412715912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2619322555626641, "compression/movement_sparsity/importance_threshold": -0.0051692307609327946, "compression/movement_sparsity/linear_layer_sparsity": 0.13507759103577327, "compression/movement_sparsity/model_sparsity": 0.13043726023641944, "compression_loss": 28.195451736450195, "distillation_loss": 0.5227553844451904, "epoch": 2.29, "learning_rate": 4.2833662064431295e-05, "loss": 28.8209, "step": 2709, "task_loss": 0.5351753234863281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2626218236380541, "compression/movement_sparsity/importance_threshold": -0.005164401208992708, "compression/movement_sparsity/linear_layer_sparsity": 0.13567320320918463, "compression/movement_sparsity/model_sparsity": 0.13101241129935676, "compression_loss": 28.269306182861328, "distillation_loss": 0.3377804160118103, "epoch": 2.29, "learning_rate": 4.282896590588899e-05, "loss": 28.7456, "step": 2710, "task_loss": 0.4308357238769531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2633109620760279, "compression/movement_sparsity/importance_threshold": -0.005159574666119151, "compression/movement_sparsity/linear_layer_sparsity": 0.13635741194813045, "compression/movement_sparsity/model_sparsity": 0.1316731153632475, "compression_loss": 28.34311866760254, "distillation_loss": 0.7233242988586426, "epoch": 2.29, "learning_rate": 4.2824269747346674e-05, "loss": 28.9653, "step": 2711, "task_loss": 1.068498134613037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2639996710104705, "compression/movement_sparsity/importance_threshold": -0.005154751131374432, "compression/movement_sparsity/linear_layer_sparsity": 0.13705034917778572, "compression/movement_sparsity/model_sparsity": 0.13234224806733985, "compression_loss": 28.416860580444336, "distillation_loss": 0.5429000854492188, "epoch": 2.29, "learning_rate": 4.281957358880436e-05, "loss": 29.0215, "step": 2712, "task_loss": 0.8164119124412537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2646879505752672, "compression/movement_sparsity/importance_threshold": -0.005149930603820853, "compression/movement_sparsity/linear_layer_sparsity": 0.1377448127008984, "compression/movement_sparsity/model_sparsity": 0.13301285463201393, "compression_loss": 28.490558624267578, "distillation_loss": 0.5070241689682007, "epoch": 2.29, "learning_rate": 4.281487743026205e-05, "loss": 29.0943, "step": 2713, "task_loss": 0.33749064803123474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.265375800904303, "compression/movement_sparsity/importance_threshold": -0.0051451130825207175, "compression/movement_sparsity/linear_layer_sparsity": 0.13842879488065912, "compression/movement_sparsity/model_sparsity": 0.13367333991972458, "compression_loss": 28.564218521118164, "distillation_loss": 0.47675377130508423, "epoch": 2.29, "learning_rate": 4.281018127171973e-05, "loss": 29.2126, "step": 2714, "task_loss": 0.824842095375061 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2660632221314633, "compression/movement_sparsity/importance_threshold": -0.00514029856653633, "compression/movement_sparsity/linear_layer_sparsity": 0.13915836315329175, "compression/movement_sparsity/model_sparsity": 0.13437784527777777, "compression_loss": 28.637840270996094, "distillation_loss": 0.5140252113342285, "epoch": 2.29, "learning_rate": 4.2805485113177426e-05, "loss": 29.2284, "step": 2715, "task_loss": 0.7775077819824219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2667502143906326, "compression/movement_sparsity/importance_threshold": -0.005135487054929996, "compression/movement_sparsity/linear_layer_sparsity": 0.13994288991455808, "compression/movement_sparsity/model_sparsity": 0.13513542113130797, "compression_loss": 28.711400985717773, "distillation_loss": 0.742621123790741, "epoch": 2.3, "learning_rate": 4.280078895463511e-05, "loss": 29.4505, "step": 2716, "task_loss": 1.4261082410812378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26743677781569686, "compression/movement_sparsity/importance_threshold": -0.005130678546764016, "compression/movement_sparsity/linear_layer_sparsity": 0.14063892742779868, "compression/movement_sparsity/model_sparsity": 0.13580754761470692, "compression_loss": 28.784929275512695, "distillation_loss": 0.5026398301124573, "epoch": 2.3, "learning_rate": 4.27960927960928e-05, "loss": 29.3382, "step": 2717, "task_loss": 0.4673776626586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26812291254054077, "compression/movement_sparsity/importance_threshold": -0.005125873041100695, "compression/movement_sparsity/linear_layer_sparsity": 0.14139141395062746, "compression/movement_sparsity/model_sparsity": 0.13653418391055722, "compression_loss": 28.85843276977539, "distillation_loss": 0.8410535454750061, "epoch": 2.3, "learning_rate": 4.2791396637550485e-05, "loss": 29.4094, "step": 2718, "task_loss": 0.5263946652412415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2688086186990496, "compression/movement_sparsity/importance_threshold": -0.005121070537002339, "compression/movement_sparsity/linear_layer_sparsity": 0.14202483765961213, "compression/movement_sparsity/model_sparsity": 0.137145847566499, "compression_loss": 28.931859970092773, "distillation_loss": 0.8662497997283936, "epoch": 2.3, "learning_rate": 4.278670047900817e-05, "loss": 29.472, "step": 2719, "task_loss": 1.3620120286941528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26949389642510846, "compression/movement_sparsity/importance_threshold": -0.005116271033531251, "compression/movement_sparsity/linear_layer_sparsity": 0.1428006001576661, "compression/movement_sparsity/model_sparsity": 0.13789496023622025, "compression_loss": 29.005239486694336, "distillation_loss": 0.5457649230957031, "epoch": 2.3, "learning_rate": 4.2782004320465865e-05, "loss": 29.6469, "step": 2720, "task_loss": 0.6805235147476196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2701787458526025, "compression/movement_sparsity/importance_threshold": -0.005111474529749733, "compression/movement_sparsity/linear_layer_sparsity": 0.14356551166317144, "compression/movement_sparsity/model_sparsity": 0.13863359467836844, "compression_loss": 29.07856559753418, "distillation_loss": 0.32495689392089844, "epoch": 2.3, "learning_rate": 4.2777308161923544e-05, "loss": 29.5127, "step": 2721, "task_loss": 0.1851312667131424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2708631671154169, "compression/movement_sparsity/importance_threshold": -0.0051066810247200915, "compression/movement_sparsity/linear_layer_sparsity": 0.14431035479454563, "compression/movement_sparsity/model_sparsity": 0.13935285015677443, "compression_loss": 29.151853561401367, "distillation_loss": 0.44361308217048645, "epoch": 2.3, "learning_rate": 4.277261200338124e-05, "loss": 29.5889, "step": 2722, "task_loss": 0.11154741793870926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27154716034743664, "compression/movement_sparsity/importance_threshold": -0.005101890517504631, "compression/movement_sparsity/linear_layer_sparsity": 0.14512277218391226, "compression/movement_sparsity/model_sparsity": 0.14013735850952808, "compression_loss": 29.225065231323242, "distillation_loss": 0.3395731449127197, "epoch": 2.3, "learning_rate": 4.2767915844838924e-05, "loss": 29.799, "step": 2723, "task_loss": 0.5562704205513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.272230725682547, "compression/movement_sparsity/importance_threshold": -0.005097103007165652, "compression/movement_sparsity/linear_layer_sparsity": 0.14584204989987515, "compression/movement_sparsity/model_sparsity": 0.1408319268231906, "compression_loss": 29.298229217529297, "distillation_loss": 0.6963639855384827, "epoch": 2.3, "learning_rate": 4.276321968629661e-05, "loss": 29.8716, "step": 2724, "task_loss": 0.3846713900566101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27291386325463274, "compression/movement_sparsity/importance_threshold": -0.005092318492765465, "compression/movement_sparsity/linear_layer_sparsity": 0.14664526183182688, "compression/movement_sparsity/model_sparsity": 0.14160754595431085, "compression_loss": 29.371349334716797, "distillation_loss": 0.5544654726982117, "epoch": 2.3, "learning_rate": 4.27585235277543e-05, "loss": 30.005, "step": 2725, "task_loss": 0.5377510786056519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27359657319758, "compression/movement_sparsity/importance_threshold": -0.005087536973366364, "compression/movement_sparsity/linear_layer_sparsity": 0.14736566041954755, "compression/movement_sparsity/model_sparsity": 0.14230319663433805, "compression_loss": 29.444408416748047, "distillation_loss": 0.5729637145996094, "epoch": 2.3, "learning_rate": 4.275382736921198e-05, "loss": 30.0073, "step": 2726, "task_loss": 0.36497941613197327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27427885564527266, "compression/movement_sparsity/importance_threshold": -0.0050827584480306635, "compression/movement_sparsity/linear_layer_sparsity": 0.1481015246526919, "compression/movement_sparsity/model_sparsity": 0.14301378166729073, "compression_loss": 29.517473220825195, "distillation_loss": 0.9229831695556641, "epoch": 2.3, "learning_rate": 4.2749131210669676e-05, "loss": 30.2028, "step": 2727, "task_loss": 0.811104416847229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2749607107315968, "compression/movement_sparsity/importance_threshold": -0.00507798291582066, "compression/movement_sparsity/linear_layer_sparsity": 0.14883112869782747, "compression/movement_sparsity/model_sparsity": 0.1437183215689513, "compression_loss": 29.590482711791992, "distillation_loss": 0.44955721497535706, "epoch": 2.31, "learning_rate": 4.274443505212736e-05, "loss": 30.1516, "step": 2728, "task_loss": 0.3513813614845276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27564213859043707, "compression/movement_sparsity/importance_threshold": -0.005073210375798661, "compression/movement_sparsity/linear_layer_sparsity": 0.1496141291656364, "compression/movement_sparsity/model_sparsity": 0.14447442356189982, "compression_loss": 29.66345977783203, "distillation_loss": 0.4217585623264313, "epoch": 2.31, "learning_rate": 4.273973889358505e-05, "loss": 30.2841, "step": 2729, "task_loss": 0.7661373615264893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27632313935567887, "compression/movement_sparsity/importance_threshold": -0.005068440827026968, "compression/movement_sparsity/linear_layer_sparsity": 0.1502887151013001, "compression/movement_sparsity/model_sparsity": 0.14512583539540438, "compression_loss": 29.736377716064453, "distillation_loss": 0.3985757827758789, "epoch": 2.31, "learning_rate": 4.2735042735042735e-05, "loss": 30.3012, "step": 2730, "task_loss": 0.39602574706077576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2770037131612072, "compression/movement_sparsity/importance_threshold": -0.005063674268567888, "compression/movement_sparsity/linear_layer_sparsity": 0.15123041431617473, "compression/movement_sparsity/model_sparsity": 0.14603518434524246, "compression_loss": 29.809223175048828, "distillation_loss": 0.7248553037643433, "epoch": 2.31, "learning_rate": 4.273034657650042e-05, "loss": 30.6188, "step": 2731, "task_loss": 0.7075513601303101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2776838601409073, "compression/movement_sparsity/importance_threshold": -0.0050589106994837225, "compression/movement_sparsity/linear_layer_sparsity": 0.1519691999703899, "compression/movement_sparsity/model_sparsity": 0.1467485904394648, "compression_loss": 29.88204002380371, "distillation_loss": 0.5001782774925232, "epoch": 2.31, "learning_rate": 4.2725650417958114e-05, "loss": 30.4618, "step": 2732, "task_loss": 0.422024667263031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27836358042866427, "compression/movement_sparsity/importance_threshold": -0.005054150118836777, "compression/movement_sparsity/linear_layer_sparsity": 0.15271559324355674, "compression/movement_sparsity/model_sparsity": 0.14746934280752408, "compression_loss": 29.954801559448242, "distillation_loss": 0.5487746596336365, "epoch": 2.31, "learning_rate": 4.27209542594158e-05, "loss": 30.7355, "step": 2733, "task_loss": 0.8000732660293579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27904287415836315, "compression/movement_sparsity/importance_threshold": -0.005049392525689355, "compression/movement_sparsity/linear_layer_sparsity": 0.15335245111282056, "compression/movement_sparsity/model_sparsity": 0.14808432264977472, "compression_loss": 30.02753257751465, "distillation_loss": 0.47658079862594604, "epoch": 2.31, "learning_rate": 4.271625810087349e-05, "loss": 30.5457, "step": 2734, "task_loss": 0.42671340703964233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2797217414638892, "compression/movement_sparsity/importance_threshold": -0.00504463791910376, "compression/movement_sparsity/linear_layer_sparsity": 0.15396908559377395, "compression/movement_sparsity/model_sparsity": 0.1486797738393178, "compression_loss": 30.100200653076172, "distillation_loss": 0.30988606810569763, "epoch": 2.31, "learning_rate": 4.2711561942331173e-05, "loss": 30.5613, "step": 2735, "task_loss": 0.7972187995910645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2804001824791271, "compression/movement_sparsity/importance_threshold": -0.005039886298142299, "compression/movement_sparsity/linear_layer_sparsity": 0.15474139008321353, "compression/movement_sparsity/model_sparsity": 0.14942554729365862, "compression_loss": 30.17279815673828, "distillation_loss": 0.8626722097396851, "epoch": 2.31, "learning_rate": 4.270686578378886e-05, "loss": 30.8724, "step": 2736, "task_loss": 1.3828822374343872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2810781973379629, "compression/movement_sparsity/importance_threshold": -0.005035137661867271, "compression/movement_sparsity/linear_layer_sparsity": 0.15543779724565082, "compression/movement_sparsity/model_sparsity": 0.1500980307276672, "compression_loss": 30.245351791381836, "distillation_loss": 0.32391732931137085, "epoch": 2.31, "learning_rate": 4.270216962524655e-05, "loss": 30.8046, "step": 2737, "task_loss": 0.612346351146698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2817557861742811, "compression/movement_sparsity/importance_threshold": -0.0050303920093409836, "compression/movement_sparsity/linear_layer_sparsity": 0.15619793908410182, "compression/movement_sparsity/model_sparsity": 0.15083205935549757, "compression_loss": 30.317852020263672, "distillation_loss": 0.5382668972015381, "epoch": 2.31, "learning_rate": 4.269747346670423e-05, "loss": 30.8806, "step": 2738, "task_loss": 0.9251440763473511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28243294912196704, "compression/movement_sparsity/importance_threshold": -0.005025649339625739, "compression/movement_sparsity/linear_layer_sparsity": 0.15690938262192794, "compression/movement_sparsity/model_sparsity": 0.15151906261914305, "compression_loss": 30.390329360961914, "distillation_loss": 0.3689855635166168, "epoch": 2.32, "learning_rate": 4.2692777308161926e-05, "loss": 30.9945, "step": 2739, "task_loss": 0.7400779128074646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28310968631490585, "compression/movement_sparsity/importance_threshold": -0.005020909651783841, "compression/movement_sparsity/linear_layer_sparsity": 0.15775497304565755, "compression/movement_sparsity/model_sparsity": 0.15233560441047714, "compression_loss": 30.462745666503906, "distillation_loss": 0.7798584699630737, "epoch": 2.32, "learning_rate": 4.268808114961961e-05, "loss": 31.1542, "step": 2740, "task_loss": 1.557666301727295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28378599788698256, "compression/movement_sparsity/importance_threshold": -0.005016172944877595, "compression/movement_sparsity/linear_layer_sparsity": 0.15854138382541033, "compression/movement_sparsity/model_sparsity": 0.1530949995606629, "compression_loss": 30.535112380981445, "distillation_loss": 0.8682030439376831, "epoch": 2.32, "learning_rate": 4.2683384991077305e-05, "loss": 31.4185, "step": 2741, "task_loss": 1.2488949298858643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2844618839720825, "compression/movement_sparsity/importance_threshold": -0.0050114392179693035, "compression/movement_sparsity/linear_layer_sparsity": 0.15918415608182154, "compression/movement_sparsity/model_sparsity": 0.1537156906126676, "compression_loss": 30.60744285583496, "distillation_loss": 0.5128454566001892, "epoch": 2.32, "learning_rate": 4.267868883253499e-05, "loss": 31.1254, "step": 2742, "task_loss": 0.4483601152896881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2851373447040906, "compression/movement_sparsity/importance_threshold": -0.005006708470121272, "compression/movement_sparsity/linear_layer_sparsity": 0.1599839577018294, "compression/movement_sparsity/model_sparsity": 0.15448801658655065, "compression_loss": 30.679731369018555, "distillation_loss": 0.726369321346283, "epoch": 2.32, "learning_rate": 4.267399267399267e-05, "loss": 31.1988, "step": 2743, "task_loss": 2.1011123657226562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28581238021689204, "compression/movement_sparsity/importance_threshold": -0.0050019807003958035, "compression/movement_sparsity/linear_layer_sparsity": 0.16066796372992542, "compression/movement_sparsity/model_sparsity": 0.1551485249033329, "compression_loss": 30.75197410583496, "distillation_loss": 0.6059142351150513, "epoch": 2.32, "learning_rate": 4.2669296515450364e-05, "loss": 31.3472, "step": 2744, "task_loss": 0.28157737851142883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2864869906443719, "compression/movement_sparsity/importance_threshold": -0.004997255907855204, "compression/movement_sparsity/linear_layer_sparsity": 0.16140496075899521, "compression/movement_sparsity/model_sparsity": 0.15586020381718607, "compression_loss": 30.824174880981445, "distillation_loss": 0.6148947477340698, "epoch": 2.32, "learning_rate": 4.266460035690805e-05, "loss": 31.3927, "step": 2745, "task_loss": 0.8020234107971191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2871611761204158, "compression/movement_sparsity/importance_threshold": -0.004992534091561773, "compression/movement_sparsity/linear_layer_sparsity": 0.16212117396387568, "compression/movement_sparsity/model_sparsity": 0.15655181289514936, "compression_loss": 30.896345138549805, "distillation_loss": 0.4758932590484619, "epoch": 2.32, "learning_rate": 4.2659904198365744e-05, "loss": 31.6064, "step": 2746, "task_loss": 0.5901272296905518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28783493677890815, "compression/movement_sparsity/importance_threshold": -0.004987815250577819, "compression/movement_sparsity/linear_layer_sparsity": 0.16274734777893776, "compression/movement_sparsity/model_sparsity": 0.15715647571332808, "compression_loss": 30.968469619750977, "distillation_loss": 0.5715819597244263, "epoch": 2.32, "learning_rate": 4.265520803982342e-05, "loss": 31.7002, "step": 2747, "task_loss": 0.5360541343688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2885082727537346, "compression/movement_sparsity/importance_threshold": -0.0049830993839656435, "compression/movement_sparsity/linear_layer_sparsity": 0.16332754200359595, "compression/movement_sparsity/model_sparsity": 0.15771673848148304, "compression_loss": 31.040565490722656, "distillation_loss": 0.9705923199653625, "epoch": 2.32, "learning_rate": 4.2650511881281116e-05, "loss": 32.0055, "step": 2748, "task_loss": 0.4924909174442291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28918118417878014, "compression/movement_sparsity/importance_threshold": -0.004978386490787552, "compression/movement_sparsity/linear_layer_sparsity": 0.16395792504983348, "compression/movement_sparsity/model_sparsity": 0.15832546593079724, "compression_loss": 31.11263084411621, "distillation_loss": 1.2930912971496582, "epoch": 2.32, "learning_rate": 4.26458157227388e-05, "loss": 32.1368, "step": 2749, "task_loss": 1.662111759185791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28985367118793004, "compression/movement_sparsity/importance_threshold": -0.004973676570105846, "compression/movement_sparsity/linear_layer_sparsity": 0.1646846315222335, "compression/movement_sparsity/model_sparsity": 0.15902720780025975, "compression_loss": 31.184650421142578, "distillation_loss": 0.7548186779022217, "epoch": 2.32, "learning_rate": 4.264111956419649e-05, "loss": 31.9204, "step": 2750, "task_loss": 1.3140308856964111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2905257339150692, "compression/movement_sparsity/importance_threshold": -0.004968969620982832, "compression/movement_sparsity/linear_layer_sparsity": 0.16547683744745734, "compression/movement_sparsity/model_sparsity": 0.15979219901484165, "compression_loss": 31.256622314453125, "distillation_loss": 0.9272348880767822, "epoch": 2.33, "learning_rate": 4.2636423405654175e-05, "loss": 32.0777, "step": 2751, "task_loss": 1.0073527097702026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29119737249408295, "compression/movement_sparsity/importance_threshold": -0.004964265642480812, "compression/movement_sparsity/linear_layer_sparsity": 0.16614864505206187, "compression/movement_sparsity/model_sparsity": 0.16044092796150608, "compression_loss": 31.328554153442383, "distillation_loss": 1.0228999853134155, "epoch": 2.33, "learning_rate": 4.263172724711186e-05, "loss": 32.2044, "step": 2752, "task_loss": 1.1393083333969116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2918685870588563, "compression/movement_sparsity/importance_threshold": -0.004959564633662093, "compression/movement_sparsity/linear_layer_sparsity": 0.166756348331456, "compression/movement_sparsity/model_sparsity": 0.16102775476373907, "compression_loss": 31.40045166015625, "distillation_loss": 0.24133244156837463, "epoch": 2.33, "learning_rate": 4.2627031088569555e-05, "loss": 31.9701, "step": 2753, "task_loss": 0.4456300735473633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29253937774327454, "compression/movement_sparsity/importance_threshold": -0.004954866593588975, "compression/movement_sparsity/linear_layer_sparsity": 0.167522809978754, "compression/movement_sparsity/model_sparsity": 0.16176788609554055, "compression_loss": 31.472288131713867, "distillation_loss": 0.2931180000305176, "epoch": 2.33, "learning_rate": 4.262233493002724e-05, "loss": 32.0299, "step": 2754, "task_loss": 0.31185248494148254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29320974468122263, "compression/movement_sparsity/importance_threshold": -0.004950171521323766, "compression/movement_sparsity/linear_layer_sparsity": 0.1681213316490685, "compression/movement_sparsity/model_sparsity": 0.16234584670521174, "compression_loss": 31.544086456298828, "distillation_loss": 0.8400447368621826, "epoch": 2.33, "learning_rate": 4.261763877148493e-05, "loss": 32.294, "step": 2755, "task_loss": 1.5892164707183838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2938796880065856, "compression/movement_sparsity/importance_threshold": -0.004945479415928769, "compression/movement_sparsity/linear_layer_sparsity": 0.16871813623924345, "compression/movement_sparsity/model_sparsity": 0.16292214922172849, "compression_loss": 31.615821838378906, "distillation_loss": 0.5703263878822327, "epoch": 2.33, "learning_rate": 4.2612942612942614e-05, "loss": 32.357, "step": 2756, "task_loss": 0.8560513257980347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.294549207853249, "compression/movement_sparsity/importance_threshold": -0.004940790276466285, "compression/movement_sparsity/linear_layer_sparsity": 0.16926456122115688, "compression/movement_sparsity/model_sparsity": 0.16344980282451332, "compression_loss": 31.68752670288086, "distillation_loss": 0.6552695631980896, "epoch": 2.33, "learning_rate": 4.26082464544003e-05, "loss": 32.3677, "step": 2757, "task_loss": 1.5018903017044067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29521830435509777, "compression/movement_sparsity/importance_threshold": -0.00493610410199862, "compression/movement_sparsity/linear_layer_sparsity": 0.17000480162382361, "compression/movement_sparsity/model_sparsity": 0.1641646136921026, "compression_loss": 31.7591609954834, "distillation_loss": 0.6357307434082031, "epoch": 2.33, "learning_rate": 4.260355029585799e-05, "loss": 32.5979, "step": 2758, "task_loss": 0.7213913202285767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.295886977646017, "compression/movement_sparsity/importance_threshold": -0.004931420891588078, "compression/movement_sparsity/linear_layer_sparsity": 0.17069354154647107, "compression/movement_sparsity/model_sparsity": 0.1648296932795953, "compression_loss": 31.830745697021484, "distillation_loss": 0.6367267370223999, "epoch": 2.33, "learning_rate": 4.259885413731568e-05, "loss": 32.5319, "step": 2759, "task_loss": 1.1770179271697998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29655522785989197, "compression/movement_sparsity/importance_threshold": -0.004926740644296964, "compression/movement_sparsity/linear_layer_sparsity": 0.17134394527016922, "compression/movement_sparsity/model_sparsity": 0.16545775363450854, "compression_loss": 31.902311325073242, "distillation_loss": 0.6723724603652954, "epoch": 2.33, "learning_rate": 4.2594157978773366e-05, "loss": 32.4794, "step": 2760, "task_loss": 1.36005699634552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29722305513060765, "compression/movement_sparsity/importance_threshold": -0.00492206335918758, "compression/movement_sparsity/linear_layer_sparsity": 0.1719671022708194, "compression/movement_sparsity/model_sparsity": 0.16605950327513122, "compression_loss": 31.97381591796875, "distillation_loss": 0.5588663816452026, "epoch": 2.33, "learning_rate": 4.258946182023105e-05, "loss": 32.6599, "step": 2761, "task_loss": 0.1758635938167572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2978904595920493, "compression/movement_sparsity/importance_threshold": -0.004917389035322231, "compression/movement_sparsity/linear_layer_sparsity": 0.17272389341816471, "compression/movement_sparsity/model_sparsity": 0.16679029631840336, "compression_loss": 32.04526138305664, "distillation_loss": 0.5710480213165283, "epoch": 2.33, "learning_rate": 4.258476566168874e-05, "loss": 32.6436, "step": 2762, "task_loss": 1.478938341140747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29855744137810203, "compression/movement_sparsity/importance_threshold": -0.00491271767176322, "compression/movement_sparsity/linear_layer_sparsity": 0.17366256389445986, "compression/movement_sparsity/model_sparsity": 0.1676967205761496, "compression_loss": 32.11666488647461, "distillation_loss": 0.581039547920227, "epoch": 2.34, "learning_rate": 4.258006950314643e-05, "loss": 32.6167, "step": 2763, "task_loss": 0.32748207449913025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.299224000622651, "compression/movement_sparsity/importance_threshold": -0.004908049267572852, "compression/movement_sparsity/linear_layer_sparsity": 0.17435265124805016, "compression/movement_sparsity/model_sparsity": 0.16836310130618706, "compression_loss": 32.187984466552734, "distillation_loss": 0.7095096111297607, "epoch": 2.34, "learning_rate": 4.257537334460411e-05, "loss": 32.7396, "step": 2764, "task_loss": 0.3692638874053955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29989013745958093, "compression/movement_sparsity/importance_threshold": -0.004903383821813434, "compression/movement_sparsity/linear_layer_sparsity": 0.17487615797976747, "compression/movement_sparsity/model_sparsity": 0.16886862397117477, "compression_loss": 32.259281158447266, "distillation_loss": 0.4133455753326416, "epoch": 2.34, "learning_rate": 4.2570677186061804e-05, "loss": 32.7346, "step": 2765, "task_loss": 0.44254234433174133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.300555852022778, "compression/movement_sparsity/importance_threshold": -0.004898721333547261, "compression/movement_sparsity/linear_layer_sparsity": 0.1754136159876187, "compression/movement_sparsity/model_sparsity": 0.16938761864304214, "compression_loss": 32.33050537109375, "distillation_loss": 0.5406585931777954, "epoch": 2.34, "learning_rate": 4.256598102751949e-05, "loss": 32.9984, "step": 2766, "task_loss": 0.42210081219673157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30122114444612613, "compression/movement_sparsity/importance_threshold": -0.004894061801836647, "compression/movement_sparsity/linear_layer_sparsity": 0.17619222836173765, "compression/movement_sparsity/model_sparsity": 0.17013948328681824, "compression_loss": 32.40168380737305, "distillation_loss": 0.6512346267700195, "epoch": 2.34, "learning_rate": 4.256128486897718e-05, "loss": 33.1394, "step": 2767, "task_loss": 1.1285040378570557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3018860148635112, "compression/movement_sparsity/importance_threshold": -0.004889405225743892, "compression/movement_sparsity/linear_layer_sparsity": 0.1769645924720154, "compression/movement_sparsity/model_sparsity": 0.17088531431383805, "compression_loss": 32.47282409667969, "distillation_loss": 0.6796640753746033, "epoch": 2.34, "learning_rate": 4.255658871043486e-05, "loss": 33.1129, "step": 2768, "task_loss": 0.12275484204292297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3025504634088183, "compression/movement_sparsity/importance_threshold": -0.004884751604331298, "compression/movement_sparsity/linear_layer_sparsity": 0.1777203104442735, "compression/movement_sparsity/model_sparsity": 0.17161507104888865, "compression_loss": 32.543914794921875, "distillation_loss": 0.4159230589866638, "epoch": 2.34, "learning_rate": 4.255189255189255e-05, "loss": 33.3077, "step": 2769, "task_loss": 0.3649306893348694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3032144902159324, "compression/movement_sparsity/importance_threshold": -0.004880100936661171, "compression/movement_sparsity/linear_layer_sparsity": 0.17826672350201928, "compression/movement_sparsity/model_sparsity": 0.1721427131371377, "compression_loss": 32.61498260498047, "distillation_loss": 0.6360831260681152, "epoch": 2.34, "learning_rate": 4.254719639335024e-05, "loss": 33.3857, "step": 2770, "task_loss": 0.8902615904808044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30387809541873867, "compression/movement_sparsity/importance_threshold": -0.004875453221795815, "compression/movement_sparsity/linear_layer_sparsity": 0.1789047141672085, "compression/movement_sparsity/model_sparsity": 0.1727587868602888, "compression_loss": 32.68601989746094, "distillation_loss": 0.5262614488601685, "epoch": 2.34, "learning_rate": 4.254250023480793e-05, "loss": 33.3303, "step": 2771, "task_loss": 0.7351558208465576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3045412791511223, "compression/movement_sparsity/importance_threshold": -0.004870808458797534, "compression/movement_sparsity/linear_layer_sparsity": 0.17961903142943486, "compression/movement_sparsity/model_sparsity": 0.17344856512706078, "compression_loss": 32.75699996948242, "distillation_loss": 0.6561937928199768, "epoch": 2.34, "learning_rate": 4.253780407626562e-05, "loss": 33.4392, "step": 2772, "task_loss": 0.26177799701690674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30520404154696845, "compression/movement_sparsity/importance_threshold": -0.004866166646728631, "compression/movement_sparsity/linear_layer_sparsity": 0.18038525459338017, "compression/movement_sparsity/model_sparsity": 0.17418846616814637, "compression_loss": 32.82796096801758, "distillation_loss": 0.4219472408294678, "epoch": 2.34, "learning_rate": 4.25331079177233e-05, "loss": 33.2983, "step": 2773, "task_loss": 0.7018374800682068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3058663827401621, "compression/movement_sparsity/importance_threshold": -0.004861527784651411, "compression/movement_sparsity/linear_layer_sparsity": 0.1809776353173622, "compression/movement_sparsity/model_sparsity": 0.17476049679188338, "compression_loss": 32.89884948730469, "distillation_loss": 0.5737485289573669, "epoch": 2.34, "learning_rate": 4.2528411759180995e-05, "loss": 33.5797, "step": 2774, "task_loss": 0.25343072414398193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30652830286458854, "compression/movement_sparsity/importance_threshold": -0.004856891871628177, "compression/movement_sparsity/linear_layer_sparsity": 0.18176370029625355, "compression/movement_sparsity/model_sparsity": 0.1755195580205311, "compression_loss": 32.969688415527344, "distillation_loss": 0.4943506121635437, "epoch": 2.35, "learning_rate": 4.252371560063868e-05, "loss": 33.5114, "step": 2775, "task_loss": 0.27319368720054626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3071898020541327, "compression/movement_sparsity/importance_threshold": -0.004852258906721235, "compression/movement_sparsity/linear_layer_sparsity": 0.1824946636964996, "compression/movement_sparsity/model_sparsity": 0.17622541057927224, "compression_loss": 33.04048156738281, "distillation_loss": 0.7300631999969482, "epoch": 2.35, "learning_rate": 4.251901944209637e-05, "loss": 33.6166, "step": 2776, "task_loss": 0.9830614328384399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30785088044268005, "compression/movement_sparsity/importance_threshold": -0.004847628888992888, "compression/movement_sparsity/linear_layer_sparsity": 0.18323243579646573, "compression/movement_sparsity/model_sparsity": 0.17693783793795206, "compression_loss": 33.111209869384766, "distillation_loss": 0.6662949323654175, "epoch": 2.35, "learning_rate": 4.2514323283554054e-05, "loss": 33.6301, "step": 2777, "task_loss": 0.518382728099823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3085115381641157, "compression/movement_sparsity/importance_threshold": -0.004843001817505436, "compression/movement_sparsity/linear_layer_sparsity": 0.18400340477913008, "compression/movement_sparsity/model_sparsity": 0.1776823217642839, "compression_loss": 33.18187713623047, "distillation_loss": 0.5028715133666992, "epoch": 2.35, "learning_rate": 4.250962712501174e-05, "loss": 33.7198, "step": 2778, "task_loss": 0.20833563804626465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30917177535232454, "compression/movement_sparsity/importance_threshold": -0.004838377691321189, "compression/movement_sparsity/linear_layer_sparsity": 0.1848401951671444, "compression/movement_sparsity/model_sparsity": 0.17849036582820157, "compression_loss": 33.25251388549805, "distillation_loss": 0.5396803617477417, "epoch": 2.35, "learning_rate": 4.2504930966469433e-05, "loss": 33.9177, "step": 2779, "task_loss": 0.5210409164428711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3098315921411918, "compression/movement_sparsity/importance_threshold": -0.004833756509502449, "compression/movement_sparsity/linear_layer_sparsity": 0.18568737150516956, "compression/movement_sparsity/model_sparsity": 0.1793084390527963, "compression_loss": 33.32306671142578, "distillation_loss": 0.449398934841156, "epoch": 2.35, "learning_rate": 4.250023480792712e-05, "loss": 33.9821, "step": 2780, "task_loss": 0.5510128140449524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3104909886646028, "compression/movement_sparsity/importance_threshold": -0.004829138271111519, "compression/movement_sparsity/linear_layer_sparsity": 0.18646290744403846, "compression/movement_sparsity/model_sparsity": 0.18005733294633747, "compression_loss": 33.393585205078125, "distillation_loss": 0.5781751871109009, "epoch": 2.35, "learning_rate": 4.2495538649384806e-05, "loss": 34.0225, "step": 2781, "task_loss": 1.3404947519302368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31114996505644243, "compression/movement_sparsity/importance_threshold": -0.0048245229752107025, "compression/movement_sparsity/linear_layer_sparsity": 0.1870965577122082, "compression/movement_sparsity/model_sparsity": 0.18066921537845937, "compression_loss": 33.46406555175781, "distillation_loss": 0.6171655654907227, "epoch": 2.35, "learning_rate": 4.249084249084249e-05, "loss": 34.1901, "step": 2782, "task_loss": 1.2153964042663574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.311808521450596, "compression/movement_sparsity/importance_threshold": -0.0048199106208623044, "compression/movement_sparsity/linear_layer_sparsity": 0.18786372288539674, "compression/movement_sparsity/model_sparsity": 0.18141002606787274, "compression_loss": 33.534523010253906, "distillation_loss": 0.8364394307136536, "epoch": 2.35, "learning_rate": 4.248614633230018e-05, "loss": 34.2247, "step": 2783, "task_loss": 0.8670456409454346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31246665798094864, "compression/movement_sparsity/importance_threshold": -0.0048153012071286295, "compression/movement_sparsity/linear_layer_sparsity": 0.18868203081357546, "compression/movement_sparsity/model_sparsity": 0.1822002226013089, "compression_loss": 33.60490417480469, "distillation_loss": 0.6204716563224792, "epoch": 2.35, "learning_rate": 4.248145017375787e-05, "loss": 34.2097, "step": 2784, "task_loss": 0.5522196292877197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3131243747813851, "compression/movement_sparsity/importance_threshold": -0.004810694733071982, "compression/movement_sparsity/linear_layer_sparsity": 0.18930477046835842, "compression/movement_sparsity/model_sparsity": 0.18280156923317878, "compression_loss": 33.675262451171875, "distillation_loss": 0.6666215658187866, "epoch": 2.35, "learning_rate": 4.247675401521555e-05, "loss": 34.4039, "step": 2785, "task_loss": 0.4958130121231079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3137816719857913, "compression/movement_sparsity/importance_threshold": -0.004806091197754663, "compression/movement_sparsity/linear_layer_sparsity": 0.19004654908865018, "compression/movement_sparsity/model_sparsity": 0.18351786547588558, "compression_loss": 33.74558639526367, "distillation_loss": 0.807817816734314, "epoch": 2.35, "learning_rate": 4.2472057856673245e-05, "loss": 34.5087, "step": 2786, "task_loss": 1.4793214797973633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3144385497280515, "compression/movement_sparsity/importance_threshold": -0.00480149060023898, "compression/movement_sparsity/linear_layer_sparsity": 0.19077004795995617, "compression/movement_sparsity/model_sparsity": 0.18421650993521935, "compression_loss": 33.81586837768555, "distillation_loss": 1.3246328830718994, "epoch": 2.36, "learning_rate": 4.246736169813093e-05, "loss": 34.6089, "step": 2787, "task_loss": 1.1179027557373047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3150950081420516, "compression/movement_sparsity/importance_threshold": -0.004796892939587235, "compression/movement_sparsity/linear_layer_sparsity": 0.19142027282113977, "compression/movement_sparsity/model_sparsity": 0.18484439757209564, "compression_loss": 33.886131286621094, "distillation_loss": 0.7599340081214905, "epoch": 2.36, "learning_rate": 4.246266553958862e-05, "loss": 34.52, "step": 2788, "task_loss": 0.9186345338821411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31575104736167636, "compression/movement_sparsity/importance_threshold": -0.004792298214861731, "compression/movement_sparsity/linear_layer_sparsity": 0.19197165825678972, "compression/movement_sparsity/model_sparsity": 0.185376841221771, "compression_loss": 33.956336975097656, "distillation_loss": 0.8322329521179199, "epoch": 2.36, "learning_rate": 4.245796938104631e-05, "loss": 34.6703, "step": 2789, "task_loss": 0.9146638512611389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31640666752081104, "compression/movement_sparsity/importance_threshold": -0.004787706425124774, "compression/movement_sparsity/linear_layer_sparsity": 0.19267244158874938, "compression/movement_sparsity/model_sparsity": 0.18605355049041616, "compression_loss": 34.026485443115234, "distillation_loss": 0.43900004029273987, "epoch": 2.36, "learning_rate": 4.245327322250399e-05, "loss": 34.6707, "step": 2790, "task_loss": 0.6692500710487366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31706186875334064, "compression/movement_sparsity/importance_threshold": -0.004783117569438668, "compression/movement_sparsity/linear_layer_sparsity": 0.19338239460562104, "compression/movement_sparsity/model_sparsity": 0.18673911443708732, "compression_loss": 34.096580505371094, "distillation_loss": 0.5224969387054443, "epoch": 2.36, "learning_rate": 4.244857706396168e-05, "loss": 34.6197, "step": 2791, "task_loss": 0.19285303354263306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3177166511931504, "compression/movement_sparsity/importance_threshold": -0.0047785316468657146, "compression/movement_sparsity/linear_layer_sparsity": 0.19422477742825658, "compression/movement_sparsity/model_sparsity": 0.18755255881829266, "compression_loss": 34.16664123535156, "distillation_loss": 0.5225247740745544, "epoch": 2.36, "learning_rate": 4.244388090541937e-05, "loss": 34.7464, "step": 2792, "task_loss": 0.36350250244140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31837101497412545, "compression/movement_sparsity/importance_threshold": -0.004773948656468221, "compression/movement_sparsity/linear_layer_sparsity": 0.1948902294515112, "compression/movement_sparsity/model_sparsity": 0.18819515051737862, "compression_loss": 34.23663330078125, "distillation_loss": 0.86546790599823, "epoch": 2.36, "learning_rate": 4.2439184746877056e-05, "loss": 34.9285, "step": 2793, "task_loss": 1.1289341449737549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3190249602301509, "compression/movement_sparsity/importance_threshold": -0.004769368597308489, "compression/movement_sparsity/linear_layer_sparsity": 0.1957255531669063, "compression/movement_sparsity/model_sparsity": 0.18900177829339357, "compression_loss": 34.30657958984375, "distillation_loss": 0.967835545539856, "epoch": 2.36, "learning_rate": 4.243448858833474e-05, "loss": 35.0436, "step": 2794, "task_loss": 1.7970212697982788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31967848709511204, "compression/movement_sparsity/importance_threshold": -0.004764791468448822, "compression/movement_sparsity/linear_layer_sparsity": 0.1963498787359848, "compression/movement_sparsity/model_sparsity": 0.18960465635852414, "compression_loss": 34.376487731933594, "distillation_loss": 0.27886083722114563, "epoch": 2.36, "learning_rate": 4.242979242979243e-05, "loss": 34.9547, "step": 2795, "task_loss": 0.08439888060092926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3203315957028935, "compression/movement_sparsity/importance_threshold": -0.004760217268951528, "compression/movement_sparsity/linear_layer_sparsity": 0.19716861593419843, "compression/movement_sparsity/model_sparsity": 0.1903952674152489, "compression_loss": 34.44633102416992, "distillation_loss": 1.0911598205566406, "epoch": 2.36, "learning_rate": 4.242509627125012e-05, "loss": 35.1312, "step": 2796, "task_loss": 1.0435272455215454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3209842861873812, "compression/movement_sparsity/importance_threshold": -0.004755645997878907, "compression/movement_sparsity/linear_layer_sparsity": 0.19778697941945902, "compression/movement_sparsity/model_sparsity": 0.19099238821248218, "compression_loss": 34.516136169433594, "distillation_loss": 0.4747304320335388, "epoch": 2.36, "learning_rate": 4.242040011270781e-05, "loss": 35.0524, "step": 2797, "task_loss": 0.6980056762695312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32163655868245977, "compression/movement_sparsity/importance_threshold": -0.004751077654293263, "compression/movement_sparsity/linear_layer_sparsity": 0.19867531798416319, "compression/movement_sparsity/model_sparsity": 0.1918502096146397, "compression_loss": 34.58591079711914, "distillation_loss": 0.8379515409469604, "epoch": 2.36, "learning_rate": 4.2415703954165494e-05, "loss": 35.1267, "step": 2798, "task_loss": 0.7269981503486633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3222884133220144, "compression/movement_sparsity/importance_threshold": -0.004746512237256903, "compression/movement_sparsity/linear_layer_sparsity": 0.19949038253874496, "compression/movement_sparsity/model_sparsity": 0.19263727419433974, "compression_loss": 34.65562438964844, "distillation_loss": 0.5053322315216064, "epoch": 2.37, "learning_rate": 4.241100779562318e-05, "loss": 35.1413, "step": 2799, "task_loss": 1.0725977420806885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32293985023993044, "compression/movement_sparsity/importance_threshold": -0.0047419497458321275, "compression/movement_sparsity/linear_layer_sparsity": 0.2003283414951876, "compression/movement_sparsity/model_sparsity": 0.19344644668276528, "compression_loss": 34.72529220581055, "distillation_loss": 0.9593079090118408, "epoch": 2.37, "learning_rate": 4.240631163708087e-05, "loss": 35.4406, "step": 2800, "task_loss": 0.6038237810134888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32359086957009287, "compression/movement_sparsity/importance_threshold": -0.004737390179081241, "compression/movement_sparsity/linear_layer_sparsity": 0.20102935138633235, "compression/movement_sparsity/model_sparsity": 0.19412337472759056, "compression_loss": 34.79494094848633, "distillation_loss": 0.3672041893005371, "epoch": 2.37, "learning_rate": 4.240161547853856e-05, "loss": 35.3609, "step": 2801, "task_loss": 1.2621623277664185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3242414714463868, "compression/movement_sparsity/importance_threshold": -0.00473283353606655, "compression/movement_sparsity/linear_layer_sparsity": 0.2017316848600847, "compression/movement_sparsity/model_sparsity": 0.194801580885889, "compression_loss": 34.864505767822266, "distillation_loss": 0.6510782837867737, "epoch": 2.37, "learning_rate": 4.2396919319996246e-05, "loss": 35.4972, "step": 2802, "task_loss": 0.7390516400337219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3248916560026974, "compression/movement_sparsity/importance_threshold": -0.004728279815850358, "compression/movement_sparsity/linear_layer_sparsity": 0.20242369200466434, "compression/movement_sparsity/model_sparsity": 0.19546981545618938, "compression_loss": 34.93404006958008, "distillation_loss": 0.7244553565979004, "epoch": 2.37, "learning_rate": 4.239222316145393e-05, "loss": 35.5431, "step": 2803, "task_loss": 0.527935802936554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3255414233729099, "compression/movement_sparsity/importance_threshold": -0.004723729017494966, "compression/movement_sparsity/linear_layer_sparsity": 0.20311687964183997, "compression/movement_sparsity/model_sparsity": 0.19613918996553342, "compression_loss": 35.00353240966797, "distillation_loss": 0.5351499319076538, "epoch": 2.37, "learning_rate": 4.238752700291162e-05, "loss": 35.7491, "step": 2804, "task_loss": 0.7425433397293091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3261907736909093, "compression/movement_sparsity/importance_threshold": -0.004719181140062681, "compression/movement_sparsity/linear_layer_sparsity": 0.20386778025037317, "compression/movement_sparsity/model_sparsity": 0.19686429482812304, "compression_loss": 35.073001861572266, "distillation_loss": 0.8405793905258179, "epoch": 2.37, "learning_rate": 4.238283084436931e-05, "loss": 35.8512, "step": 2805, "task_loss": 1.8521146774291992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32683970709058086, "compression/movement_sparsity/importance_threshold": -0.004714636182615806, "compression/movement_sparsity/linear_layer_sparsity": 0.20461590252784723, "compression/movement_sparsity/model_sparsity": 0.19758671680387252, "compression_loss": 35.14237976074219, "distillation_loss": 0.3683815002441406, "epoch": 2.37, "learning_rate": 4.2378134685827e-05, "loss": 35.7502, "step": 2806, "task_loss": 0.2676860988140106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32748822370580943, "compression/movement_sparsity/importance_threshold": -0.004710094144216646, "compression/movement_sparsity/linear_layer_sparsity": 0.2052920863019741, "compression/movement_sparsity/model_sparsity": 0.19823967158517355, "compression_loss": 35.211734771728516, "distillation_loss": 0.5893499851226807, "epoch": 2.37, "learning_rate": 4.237343852728468e-05, "loss": 35.8177, "step": 2807, "task_loss": 0.6533238291740417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3281363236704806, "compression/movement_sparsity/importance_threshold": -0.0047055550239275025, "compression/movement_sparsity/linear_layer_sparsity": 0.20606610787155324, "compression/movement_sparsity/model_sparsity": 0.1989871031326688, "compression_loss": 35.281063079833984, "distillation_loss": 0.8676565289497375, "epoch": 2.37, "learning_rate": 4.236874236874237e-05, "loss": 35.9563, "step": 2808, "task_loss": 1.0786687135696411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3287840071184793, "compression/movement_sparsity/importance_threshold": -0.004701018820810681, "compression/movement_sparsity/linear_layer_sparsity": 0.20678062784462944, "compression/movement_sparsity/model_sparsity": 0.19967707714654925, "compression_loss": 35.350318908691406, "distillation_loss": 0.7156251668930054, "epoch": 2.37, "learning_rate": 4.236404621020006e-05, "loss": 35.9579, "step": 2809, "task_loss": 0.6859104633331299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3294312741836908, "compression/movement_sparsity/importance_threshold": -0.004696485533928485, "compression/movement_sparsity/linear_layer_sparsity": 0.2075331143674582, "compression/movement_sparsity/model_sparsity": 0.20040371344239954, "compression_loss": 35.419551849365234, "distillation_loss": 0.758513331413269, "epoch": 2.38, "learning_rate": 4.235935005165775e-05, "loss": 36.21, "step": 2810, "task_loss": 0.7702853083610535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.330078125, "compression/movement_sparsity/importance_threshold": -0.00469195516234322, "compression/movement_sparsity/linear_layer_sparsity": 0.20822302285853397, "compression/movement_sparsity/model_sparsity": 0.20106992145440009, "compression_loss": 35.48873519897461, "distillation_loss": 0.4104093015193939, "epoch": 2.38, "learning_rate": 4.235465389311543e-05, "loss": 36.2185, "step": 2811, "task_loss": 0.8773511648178101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3307245597012922, "compression/movement_sparsity/importance_threshold": -0.004687427705117188, "compression/movement_sparsity/linear_layer_sparsity": 0.20895203069528773, "compression/movement_sparsity/model_sparsity": 0.20177388562927093, "compression_loss": 35.55790710449219, "distillation_loss": 1.0391801595687866, "epoch": 2.38, "learning_rate": 4.234995773457312e-05, "loss": 36.1907, "step": 2812, "task_loss": 0.6424437761306763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33137057842145246, "compression/movement_sparsity/importance_threshold": -0.004682903161312694, "compression/movement_sparsity/linear_layer_sparsity": 0.20967874909185538, "compression/movement_sparsity/model_sparsity": 0.20247563901326923, "compression_loss": 35.62705993652344, "distillation_loss": 0.31216877698898315, "epoch": 2.38, "learning_rate": 4.234526157603081e-05, "loss": 36.0388, "step": 2813, "task_loss": 0.7739354372024536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33201618129436594, "compression/movement_sparsity/importance_threshold": -0.004678381529992042, "compression/movement_sparsity/linear_layer_sparsity": 0.2103461924511052, "compression/movement_sparsity/model_sparsity": 0.20312015363983285, "compression_loss": 35.69611358642578, "distillation_loss": 0.5482176542282104, "epoch": 2.38, "learning_rate": 4.2340565417488496e-05, "loss": 36.3657, "step": 2814, "task_loss": 0.32889318466186523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3326613684539179, "compression/movement_sparsity/importance_threshold": -0.0046738628102175345, "compression/movement_sparsity/linear_layer_sparsity": 0.21122898627785866, "compression/movement_sparsity/model_sparsity": 0.2039726207828459, "compression_loss": 35.7651252746582, "distillation_loss": 0.739136815071106, "epoch": 2.38, "learning_rate": 4.233586925894618e-05, "loss": 36.4543, "step": 2815, "task_loss": 0.7743887901306152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33330614003399306, "compression/movement_sparsity/importance_threshold": -0.004669347001051479, "compression/movement_sparsity/linear_layer_sparsity": 0.21187767292141726, "compression/movement_sparsity/model_sparsity": 0.2045990230446047, "compression_loss": 35.8340950012207, "distillation_loss": 0.7866579294204712, "epoch": 2.38, "learning_rate": 4.233117310040387e-05, "loss": 36.5839, "step": 2816, "task_loss": 0.9403125047683716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33395049616847705, "compression/movement_sparsity/importance_threshold": -0.004664834101556176, "compression/movement_sparsity/linear_layer_sparsity": 0.21260136257940543, "compression/movement_sparsity/model_sparsity": 0.20529785173651116, "compression_loss": 35.90303421020508, "distillation_loss": 0.8537001609802246, "epoch": 2.38, "learning_rate": 4.232647694186156e-05, "loss": 36.5099, "step": 2817, "task_loss": 0.6227242350578308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3345944369912549, "compression/movement_sparsity/importance_threshold": -0.004660324110793932, "compression/movement_sparsity/linear_layer_sparsity": 0.21326855553113486, "compression/movement_sparsity/model_sparsity": 0.20594212455782313, "compression_loss": 35.971900939941406, "distillation_loss": 0.3539462089538574, "epoch": 2.38, "learning_rate": 4.232178078331925e-05, "loss": 36.4416, "step": 2818, "task_loss": 0.09632567316293716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33523796263621175, "compression/movement_sparsity/importance_threshold": -0.0046558170278270476, "compression/movement_sparsity/linear_layer_sparsity": 0.21399220941662012, "compression/movement_sparsity/model_sparsity": 0.2066409187061222, "compression_loss": 36.04072952270508, "distillation_loss": 0.3503519296646118, "epoch": 2.38, "learning_rate": 4.2317084624776934e-05, "loss": 36.5733, "step": 2819, "task_loss": 0.28203102946281433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3358810732372326, "compression/movement_sparsity/importance_threshold": -0.004651312851717829, "compression/movement_sparsity/linear_layer_sparsity": 0.21466055901261025, "compression/movement_sparsity/model_sparsity": 0.20728630843740622, "compression_loss": 36.109493255615234, "distillation_loss": 0.4840245842933655, "epoch": 2.38, "learning_rate": 4.231238846623462e-05, "loss": 36.8159, "step": 2820, "task_loss": 1.1661646366119385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3365237689282028, "compression/movement_sparsity/importance_threshold": -0.00464681158152858, "compression/movement_sparsity/linear_layer_sparsity": 0.21542376536214364, "compression/movement_sparsity/model_sparsity": 0.2080232963009358, "compression_loss": 36.17823791503906, "distillation_loss": 0.5955571532249451, "epoch": 2.38, "learning_rate": 4.230769230769231e-05, "loss": 36.7818, "step": 2821, "task_loss": 0.8083208799362183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33716604984300724, "compression/movement_sparsity/importance_threshold": -0.004642313216321605, "compression/movement_sparsity/linear_layer_sparsity": 0.2162672452082017, "compression/movement_sparsity/model_sparsity": 0.20883780001943422, "compression_loss": 36.246891021728516, "distillation_loss": 1.1211678981781006, "epoch": 2.39, "learning_rate": 4.230299614915e-05, "loss": 37.1341, "step": 2822, "task_loss": 0.7611430287361145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3378079161155312, "compression/movement_sparsity/importance_threshold": -0.004637817755159208, "compression/movement_sparsity/linear_layer_sparsity": 0.21707049291265634, "compression/movement_sparsity/model_sparsity": 0.20961345369416187, "compression_loss": 36.31551742553711, "distillation_loss": 0.6499782800674438, "epoch": 2.39, "learning_rate": 4.2298299990607687e-05, "loss": 36.9344, "step": 2823, "task_loss": 1.131666898727417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33844936787965985, "compression/movement_sparsity/importance_threshold": -0.0046333251971036925, "compression/movement_sparsity/linear_layer_sparsity": 0.21771463644834566, "compression/movement_sparsity/model_sparsity": 0.21023546891778297, "compression_loss": 36.38413619995117, "distillation_loss": 1.1920796632766724, "epoch": 2.39, "learning_rate": 4.229360383206537e-05, "loss": 37.262, "step": 2824, "task_loss": 1.2239148616790771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33909040526927825, "compression/movement_sparsity/importance_threshold": -0.004628835541217362, "compression/movement_sparsity/linear_layer_sparsity": 0.2183505403841986, "compression/movement_sparsity/model_sparsity": 0.21084952759717004, "compression_loss": 36.45266342163086, "distillation_loss": 0.4528842270374298, "epoch": 2.39, "learning_rate": 4.228890767352306e-05, "loss": 36.9567, "step": 2825, "task_loss": 0.4460195302963257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3397310284182715, "compression/movement_sparsity/importance_threshold": -0.004624348786562522, "compression/movement_sparsity/linear_layer_sparsity": 0.2191508905159177, "compression/movement_sparsity/model_sparsity": 0.21162238323969962, "compression_loss": 36.52116012573242, "distillation_loss": 0.3998104929924011, "epoch": 2.39, "learning_rate": 4.2284211514980746e-05, "loss": 37.053, "step": 2826, "task_loss": 0.15969060361385345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34037123746052467, "compression/movement_sparsity/importance_threshold": -0.004619864932201477, "compression/movement_sparsity/linear_layer_sparsity": 0.21985911452848217, "compression/movement_sparsity/model_sparsity": 0.2123062775786806, "compression_loss": 36.589595794677734, "distillation_loss": 0.5855236053466797, "epoch": 2.39, "learning_rate": 4.227951535643844e-05, "loss": 37.2528, "step": 2827, "task_loss": 0.25490692257881165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34101103252992326, "compression/movement_sparsity/importance_threshold": -0.0046153839771965275, "compression/movement_sparsity/linear_layer_sparsity": 0.2205858210008822, "compression/movement_sparsity/model_sparsity": 0.21300801944814307, "compression_loss": 36.65797424316406, "distillation_loss": 0.6798850893974304, "epoch": 2.39, "learning_rate": 4.227481919789612e-05, "loss": 37.2153, "step": 2828, "task_loss": 0.4578976035118103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3416504137603522, "compression/movement_sparsity/importance_threshold": -0.004610905920609979, "compression/movement_sparsity/linear_layer_sparsity": 0.22136138078808637, "compression/movement_sparsity/model_sparsity": 0.2137569363707558, "compression_loss": 36.72627258300781, "distillation_loss": 0.8175458908081055, "epoch": 2.39, "learning_rate": 4.227012303935381e-05, "loss": 37.5156, "step": 2829, "task_loss": 1.8037407398223877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34228938128569664, "compression/movement_sparsity/importance_threshold": -0.004606430761504136, "compression/movement_sparsity/linear_layer_sparsity": 0.22223964343113822, "compression/movement_sparsity/model_sparsity": 0.2146050279901669, "compression_loss": 36.794559478759766, "distillation_loss": 0.935619592666626, "epoch": 2.39, "learning_rate": 4.22654268808115e-05, "loss": 37.5858, "step": 2830, "task_loss": 0.9907481670379639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3429279352398418, "compression/movement_sparsity/importance_threshold": -0.004601958498941303, "compression/movement_sparsity/linear_layer_sparsity": 0.22299996413210374, "compression/movement_sparsity/model_sparsity": 0.21533922933603422, "compression_loss": 36.8628044128418, "distillation_loss": 0.6764949560165405, "epoch": 2.39, "learning_rate": 4.2260730722269184e-05, "loss": 37.4213, "step": 2831, "task_loss": 0.14373371005058289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34356607575667253, "compression/movement_sparsity/importance_threshold": -0.004597489131983783, "compression/movement_sparsity/linear_layer_sparsity": 0.22376028483306928, "compression/movement_sparsity/model_sparsity": 0.21607343068190152, "compression_loss": 36.9310302734375, "distillation_loss": 0.578451931476593, "epoch": 2.39, "learning_rate": 4.225603456372688e-05, "loss": 37.7139, "step": 2832, "task_loss": 0.6982750296592712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34420380297007425, "compression/movement_sparsity/importance_threshold": -0.004593022659693881, "compression/movement_sparsity/linear_layer_sparsity": 0.2245329708958732, "compression/movement_sparsity/model_sparsity": 0.21681957260138776, "compression_loss": 36.99917221069336, "distillation_loss": 0.44185081124305725, "epoch": 2.39, "learning_rate": 4.225133840518456e-05, "loss": 37.578, "step": 2833, "task_loss": 0.9641563296318054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34484111701393205, "compression/movement_sparsity/importance_threshold": -0.004588559081133899, "compression/movement_sparsity/linear_layer_sparsity": 0.22528269101181045, "compression/movement_sparsity/model_sparsity": 0.21754353752493372, "compression_loss": 37.06732940673828, "distillation_loss": 0.5886393189430237, "epoch": 2.4, "learning_rate": 4.224664224664225e-05, "loss": 37.675, "step": 2834, "task_loss": 0.8013940453529358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3454780180221311, "compression/movement_sparsity/importance_threshold": -0.0045840983953661435, "compression/movement_sparsity/linear_layer_sparsity": 0.22605546054378783, "compression/movement_sparsity/model_sparsity": 0.2182897600461705, "compression_loss": 37.13538360595703, "distillation_loss": 0.4786885976791382, "epoch": 2.4, "learning_rate": 4.2241946088099936e-05, "loss": 37.6376, "step": 2835, "task_loss": 0.45392701029777527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3461145061285561, "compression/movement_sparsity/importance_threshold": -0.004579640601452918, "compression/movement_sparsity/linear_layer_sparsity": 0.22683406099373915, "compression/movement_sparsity/model_sparsity": 0.21904161317541085, "compression_loss": 37.20341491699219, "distillation_loss": 0.45206597447395325, "epoch": 2.4, "learning_rate": 4.223724992955763e-05, "loss": 37.7405, "step": 2836, "task_loss": 1.5867104530334473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34675058146709314, "compression/movement_sparsity/importance_threshold": -0.004575185698456524, "compression/movement_sparsity/linear_layer_sparsity": 0.22763996778757659, "compression/movement_sparsity/model_sparsity": 0.2198198345916207, "compression_loss": 37.27141571044922, "distillation_loss": 0.5896062850952148, "epoch": 2.4, "learning_rate": 4.223255377101531e-05, "loss": 37.8077, "step": 2837, "task_loss": 0.717898428440094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34738624417162633, "compression/movement_sparsity/importance_threshold": -0.004570733685439269, "compression/movement_sparsity/linear_layer_sparsity": 0.22829324523567499, "compression/movement_sparsity/model_sparsity": 0.2204506699496604, "compression_loss": 37.33934783935547, "distillation_loss": 0.5407304167747498, "epoch": 2.4, "learning_rate": 4.2227857612472995e-05, "loss": 38.1771, "step": 2838, "task_loss": 0.540482759475708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34802149437604146, "compression/movement_sparsity/importance_threshold": -0.004566284561463454, "compression/movement_sparsity/linear_layer_sparsity": 0.22905790633365997, "compression/movement_sparsity/model_sparsity": 0.22118906258655693, "compression_loss": 37.40721893310547, "distillation_loss": 0.5764235854148865, "epoch": 2.4, "learning_rate": 4.222316145393069e-05, "loss": 38.0594, "step": 2839, "task_loss": 0.8560652732849121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34865633221422343, "compression/movement_sparsity/importance_threshold": -0.004561838325591386, "compression/movement_sparsity/linear_layer_sparsity": 0.2299116528880525, "compression/movement_sparsity/model_sparsity": 0.22201348032037446, "compression_loss": 37.47505187988281, "distillation_loss": 0.9417319893836975, "epoch": 2.4, "learning_rate": 4.2218465295388375e-05, "loss": 38.2177, "step": 2840, "task_loss": 1.4386892318725586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3492907578200576, "compression/movement_sparsity/importance_threshold": -0.004557394976885365, "compression/movement_sparsity/linear_layer_sparsity": 0.23059410877435582, "compression/movement_sparsity/model_sparsity": 0.22267249174750342, "compression_loss": 37.54283142089844, "distillation_loss": 0.6872855424880981, "epoch": 2.4, "learning_rate": 4.221376913684606e-05, "loss": 38.3467, "step": 2841, "task_loss": 0.5796800851821899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34992477132742883, "compression/movement_sparsity/importance_threshold": -0.004552954514407698, "compression/movement_sparsity/linear_layer_sparsity": 0.23143056528567635, "compression/movement_sparsity/model_sparsity": 0.22348021340441887, "compression_loss": 37.610572814941406, "distillation_loss": 0.789714515209198, "epoch": 2.4, "learning_rate": 4.220907297830375e-05, "loss": 38.3226, "step": 2842, "task_loss": 0.7521236538887024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3505583728702224, "compression/movement_sparsity/importance_threshold": -0.004548516937220688, "compression/movement_sparsity/linear_layer_sparsity": 0.23222729047043417, "compression/movement_sparsity/model_sparsity": 0.2242495686280669, "compression_loss": 37.67828369140625, "distillation_loss": 0.5410398840904236, "epoch": 2.4, "learning_rate": 4.220437681976144e-05, "loss": 38.4518, "step": 2843, "task_loss": 0.43216603994369507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3511915625823233, "compression/movement_sparsity/importance_threshold": -0.00454408224438664, "compression/movement_sparsity/linear_layer_sparsity": 0.2330368222112329, "compression/movement_sparsity/model_sparsity": 0.22503129046315826, "compression_loss": 37.74591827392578, "distillation_loss": 0.5213282704353333, "epoch": 2.4, "learning_rate": 4.219968066121913e-05, "loss": 38.5246, "step": 2844, "task_loss": 0.3962607979774475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.351824340597617, "compression/movement_sparsity/importance_threshold": -0.004539650434967856, "compression/movement_sparsity/linear_layer_sparsity": 0.23383051865741122, "compression/movement_sparsity/model_sparsity": 0.22579772099471448, "compression_loss": 37.81352996826172, "distillation_loss": 0.7692157030105591, "epoch": 2.4, "learning_rate": 4.219498450267681e-05, "loss": 38.4898, "step": 2845, "task_loss": 0.5137061476707458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3524567070499883, "compression/movement_sparsity/importance_threshold": -0.0045352215080266415, "compression/movement_sparsity/linear_layer_sparsity": 0.23455111995598169, "compression/movement_sparsity/model_sparsity": 0.22649356742185017, "compression_loss": 37.88108825683594, "distillation_loss": 0.41877615451812744, "epoch": 2.41, "learning_rate": 4.21902883441345e-05, "loss": 38.4618, "step": 2846, "task_loss": 0.28510376811027527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35308866207332235, "compression/movement_sparsity/importance_threshold": -0.004530795462625302, "compression/movement_sparsity/linear_layer_sparsity": 0.23518345856571152, "compression/movement_sparsity/model_sparsity": 0.22710418325503468, "compression_loss": 37.94862365722656, "distillation_loss": 0.45660996437072754, "epoch": 2.41, "learning_rate": 4.2185592185592186e-05, "loss": 38.5502, "step": 2847, "task_loss": 0.3508904278278351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35372020580150454, "compression/movement_sparsity/importance_threshold": -0.004526372297826138, "compression/movement_sparsity/linear_layer_sparsity": 0.23585833068139844, "compression/movement_sparsity/model_sparsity": 0.2277558714373983, "compression_loss": 38.016109466552734, "distillation_loss": 0.8252172470092773, "epoch": 2.41, "learning_rate": 4.218089602704988e-05, "loss": 38.8264, "step": 2848, "task_loss": 0.38833123445510864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3543513383684199, "compression/movement_sparsity/importance_threshold": -0.004521952012691455, "compression/movement_sparsity/linear_layer_sparsity": 0.23666479791111478, "compression/movement_sparsity/model_sparsity": 0.22853463403679047, "compression_loss": 38.083526611328125, "distillation_loss": 0.6967899799346924, "epoch": 2.41, "learning_rate": 4.2176199868507565e-05, "loss": 38.8926, "step": 2849, "task_loss": 0.9952340126037598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3549820599079536, "compression/movement_sparsity/importance_threshold": -0.0045175346062835565, "compression/movement_sparsity/linear_layer_sparsity": 0.23741293211275646, "compression/movement_sparsity/model_sparsity": 0.22925706752707578, "compression_loss": 38.15088653564453, "distillation_loss": 0.9019254446029663, "epoch": 2.41, "learning_rate": 4.217150370996525e-05, "loss": 38.9024, "step": 2850, "task_loss": 0.8501352667808533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35561237055399075, "compression/movement_sparsity/importance_threshold": -0.004513120077664747, "compression/movement_sparsity/linear_layer_sparsity": 0.23825929760738238, "compression/movement_sparsity/model_sparsity": 0.23007435776323648, "compression_loss": 38.21818923950195, "distillation_loss": 0.9143953919410706, "epoch": 2.41, "learning_rate": 4.216680755142294e-05, "loss": 38.8971, "step": 2851, "task_loss": 0.38667431473731995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35624227044041656, "compression/movement_sparsity/importance_threshold": -0.00450870842589733, "compression/movement_sparsity/linear_layer_sparsity": 0.23906649221132448, "compression/movement_sparsity/model_sparsity": 0.23085382274931213, "compression_loss": 38.285499572753906, "distillation_loss": 0.9725979566574097, "epoch": 2.41, "learning_rate": 4.2162111392880624e-05, "loss": 39.0366, "step": 2852, "task_loss": 0.5380443930625916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35687175970111595, "compression/movement_sparsity/importance_threshold": -0.004504299650043611, "compression/movement_sparsity/linear_layer_sparsity": 0.23979530926139608, "compression/movement_sparsity/model_sparsity": 0.23155760269161024, "compression_loss": 38.352752685546875, "distillation_loss": 0.7909529805183411, "epoch": 2.41, "learning_rate": 4.215741523433832e-05, "loss": 39.1394, "step": 2853, "task_loss": 0.7013994455337524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35750083846997427, "compression/movement_sparsity/importance_threshold": -0.004499893749165892, "compression/movement_sparsity/linear_layer_sparsity": 0.24057822626003156, "compression/movement_sparsity/model_sparsity": 0.2323136240828082, "compression_loss": 38.41997528076172, "distillation_loss": 0.6894046068191528, "epoch": 2.41, "learning_rate": 4.2152719075796e-05, "loss": 39.0867, "step": 2854, "task_loss": 0.43048420548439026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35812950688087664, "compression/movement_sparsity/importance_threshold": -0.0044954907223264794, "compression/movement_sparsity/linear_layer_sparsity": 0.24128690339096617, "compression/movement_sparsity/model_sparsity": 0.23299795597414935, "compression_loss": 38.487152099609375, "distillation_loss": 0.8894556760787964, "epoch": 2.41, "learning_rate": 4.214802291725369e-05, "loss": 39.3172, "step": 2855, "task_loss": 1.365706205368042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35875776506770773, "compression/movement_sparsity/importance_threshold": -0.004491090568587677, "compression/movement_sparsity/linear_layer_sparsity": 0.2420775830227326, "compression/movement_sparsity/model_sparsity": 0.23376147332814956, "compression_loss": 38.554290771484375, "distillation_loss": 0.36945828795433044, "epoch": 2.41, "learning_rate": 4.2143326758711376e-05, "loss": 39.319, "step": 2856, "task_loss": 0.11058413982391357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3593856131643538, "compression/movement_sparsity/importance_threshold": -0.004486693287011782, "compression/movement_sparsity/linear_layer_sparsity": 0.24279491709937087, "compression/movement_sparsity/model_sparsity": 0.23445416477247755, "compression_loss": 38.62138748168945, "distillation_loss": 1.327958106994629, "epoch": 2.41, "learning_rate": 4.213863060016906e-05, "loss": 39.6254, "step": 2857, "task_loss": 1.2029945850372314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36001305130469874, "compression/movement_sparsity/importance_threshold": -0.004482298876661109, "compression/movement_sparsity/linear_layer_sparsity": 0.24352433035782423, "compression/movement_sparsity/model_sparsity": 0.2351585204415654, "compression_loss": 38.688438415527344, "distillation_loss": 0.4099147915840149, "epoch": 2.42, "learning_rate": 4.213393444162675e-05, "loss": 39.3491, "step": 2858, "task_loss": 0.821457028388977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36064007962262845, "compression/movement_sparsity/importance_threshold": -0.004477907336597956, "compression/movement_sparsity/linear_layer_sparsity": 0.24410912731118986, "compression/movement_sparsity/model_sparsity": 0.23572322782053706, "compression_loss": 38.75544738769531, "distillation_loss": 0.3898249864578247, "epoch": 2.42, "learning_rate": 4.2129238283084435e-05, "loss": 39.5784, "step": 2859, "task_loss": 0.44446590542793274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36126669825202784, "compression/movement_sparsity/importance_threshold": -0.004473518665884626, "compression/movement_sparsity/linear_layer_sparsity": 0.2450224350829235, "compression/movement_sparsity/model_sparsity": 0.23660516066064832, "compression_loss": 38.822410583496094, "distillation_loss": 0.4820502996444702, "epoch": 2.42, "learning_rate": 4.212454212454213e-05, "loss": 39.4989, "step": 2860, "task_loss": 0.7827270030975342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36189290732678214, "compression/movement_sparsity/importance_threshold": -0.004469132863583426, "compression/movement_sparsity/linear_layer_sparsity": 0.24563429989682256, "compression/movement_sparsity/model_sparsity": 0.2371960060358736, "compression_loss": 38.88930130004883, "distillation_loss": 0.8735820055007935, "epoch": 2.42, "learning_rate": 4.2119845965999815e-05, "loss": 39.6787, "step": 2861, "task_loss": 1.5371885299682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3625187069807764, "compression/movement_sparsity/importance_threshold": -0.004464749928756659, "compression/movement_sparsity/linear_layer_sparsity": 0.246373240565217, "compression/movement_sparsity/model_sparsity": 0.2379095618190613, "compression_loss": 38.956138610839844, "distillation_loss": 1.015181541442871, "epoch": 2.42, "learning_rate": 4.21151498074575e-05, "loss": 39.8934, "step": 2862, "task_loss": 1.1540307998657227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36314409734789577, "compression/movement_sparsity/importance_threshold": -0.004460369860466627, "compression/movement_sparsity/linear_layer_sparsity": 0.2471150668821793, "compression/movement_sparsity/model_sparsity": 0.23862590411991128, "compression_loss": 39.02297592163086, "distillation_loss": 0.7358248829841614, "epoch": 2.42, "learning_rate": 4.211045364891519e-05, "loss": 39.845, "step": 2863, "task_loss": 0.4448283314704895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36376907856202545, "compression/movement_sparsity/importance_threshold": -0.004455992657775637, "compression/movement_sparsity/linear_layer_sparsity": 0.24789999906514526, "compression/movement_sparsity/model_sparsity": 0.23938387146765852, "compression_loss": 39.089725494384766, "distillation_loss": 0.5005629062652588, "epoch": 2.42, "learning_rate": 4.2105757490372874e-05, "loss": 39.6118, "step": 2864, "task_loss": 0.2224845290184021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36439365075705055, "compression/movement_sparsity/importance_threshold": -0.004451618319745991, "compression/movement_sparsity/linear_layer_sparsity": 0.24883126463333852, "compression/movement_sparsity/model_sparsity": 0.24028314519867636, "compression_loss": 39.15641403198242, "distillation_loss": 0.39794421195983887, "epoch": 2.42, "learning_rate": 4.210106133183057e-05, "loss": 39.7211, "step": 2865, "task_loss": 0.3817918002605438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36501781406685607, "compression/movement_sparsity/importance_threshold": -0.004447246845439996, "compression/movement_sparsity/linear_layer_sparsity": 0.2496526489967673, "compression/movement_sparsity/model_sparsity": 0.2410763124823475, "compression_loss": 39.22306442260742, "distillation_loss": 0.7675390243530273, "epoch": 2.42, "learning_rate": 4.209636517328825e-05, "loss": 39.9514, "step": 2866, "task_loss": 1.4659264087677002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36564156862532715, "compression/movement_sparsity/importance_threshold": -0.004442878233919953, "compression/movement_sparsity/linear_layer_sparsity": 0.25043907170068774, "compression/movement_sparsity/model_sparsity": 0.24183571914706906, "compression_loss": 39.2896728515625, "distillation_loss": 0.4020206034183502, "epoch": 2.42, "learning_rate": 4.209166901474594e-05, "loss": 39.8924, "step": 2867, "task_loss": 0.13939334452152252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36626491456634935, "compression/movement_sparsity/importance_threshold": -0.0044385124842481645, "compression/movement_sparsity/linear_layer_sparsity": 0.25119482544544874, "compression/movement_sparsity/model_sparsity": 0.24256551042572705, "compression_loss": 39.35624694824219, "distillation_loss": 0.7531265616416931, "epoch": 2.42, "learning_rate": 4.2086972856203626e-05, "loss": 40.1112, "step": 2868, "task_loss": 0.6136288046836853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36688785202380747, "compression/movement_sparsity/importance_threshold": -0.004434149595486938, "compression/movement_sparsity/linear_layer_sparsity": 0.25192741053249323, "compression/movement_sparsity/model_sparsity": 0.24327292896133626, "compression_loss": 39.422752380371094, "distillation_loss": 0.5470821857452393, "epoch": 2.42, "learning_rate": 4.208227669766132e-05, "loss": 40.2009, "step": 2869, "task_loss": 0.7233137488365173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3675103811315866, "compression/movement_sparsity/importance_threshold": -0.004429789566698576, "compression/movement_sparsity/linear_layer_sparsity": 0.2528071636964996, "compression/movement_sparsity/model_sparsity": 0.2441224598977217, "compression_loss": 39.48924255371094, "distillation_loss": 0.580055832862854, "epoch": 2.43, "learning_rate": 4.2077580539119006e-05, "loss": 40.1623, "step": 2870, "task_loss": 0.3591819405555725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3681325020235722, "compression/movement_sparsity/importance_threshold": -0.004425432396945382, "compression/movement_sparsity/linear_layer_sparsity": 0.25380409565586354, "compression/movement_sparsity/model_sparsity": 0.24508514417736005, "compression_loss": 39.55565643310547, "distillation_loss": 0.6337827444076538, "epoch": 2.43, "learning_rate": 4.2072884380576685e-05, "loss": 40.4344, "step": 2871, "task_loss": 0.5271090865135193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.368754214833649, "compression/movement_sparsity/importance_threshold": -0.004421078085289661, "compression/movement_sparsity/linear_layer_sparsity": 0.2546627072706515, "compression/movement_sparsity/model_sparsity": 0.24591425984178175, "compression_loss": 39.62202072143555, "distillation_loss": 1.0839588642120361, "epoch": 2.43, "learning_rate": 4.206818822203438e-05, "loss": 40.3394, "step": 2872, "task_loss": 0.9516077637672424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36937551969570237, "compression/movement_sparsity/importance_threshold": -0.004416726630793717, "compression/movement_sparsity/linear_layer_sparsity": 0.25527959215912527, "compression/movement_sparsity/model_sparsity": 0.24650995283657653, "compression_loss": 39.6883544921875, "distillation_loss": 0.46678072214126587, "epoch": 2.43, "learning_rate": 4.2063492063492065e-05, "loss": 40.1666, "step": 2873, "task_loss": 0.27631038427352905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3699964167436175, "compression/movement_sparsity/importance_threshold": -0.004412378032519852, "compression/movement_sparsity/linear_layer_sparsity": 0.2560888019473978, "compression/movement_sparsity/model_sparsity": 0.24729136377920144, "compression_loss": 39.75463104248047, "distillation_loss": 0.5165824890136719, "epoch": 2.43, "learning_rate": 4.205879590494976e-05, "loss": 40.4784, "step": 2874, "task_loss": 0.4173288345336914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3706169061112794, "compression/movement_sparsity/importance_threshold": -0.004408032289530373, "compression/movement_sparsity/linear_layer_sparsity": 0.25687739484982797, "compression/movement_sparsity/model_sparsity": 0.2480528660894376, "compression_loss": 39.82091522216797, "distillation_loss": 0.9371733665466309, "epoch": 2.43, "learning_rate": 4.205409974640744e-05, "loss": 40.69, "step": 2875, "task_loss": 1.7548267841339111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.371236987932573, "compression/movement_sparsity/importance_threshold": -0.004403689400887583, "compression/movement_sparsity/linear_layer_sparsity": 0.25749401740661376, "compression/movement_sparsity/model_sparsity": 0.2486483057644449, "compression_loss": 39.88713836669922, "distillation_loss": 0.9543690085411072, "epoch": 2.43, "learning_rate": 4.204940358786513e-05, "loss": 40.6556, "step": 2876, "task_loss": 1.0020779371261597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37185666234138415, "compression/movement_sparsity/importance_threshold": -0.004399349365653784, "compression/movement_sparsity/linear_layer_sparsity": 0.2582421158357525, "compression/movement_sparsity/model_sparsity": 0.2493707047111228, "compression_loss": 39.95332336425781, "distillation_loss": 0.8314875960350037, "epoch": 2.43, "learning_rate": 4.204470742932282e-05, "loss": 40.5892, "step": 2877, "task_loss": 0.8611935973167419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37247592947159713, "compression/movement_sparsity/importance_threshold": -0.004395012182891282, "compression/movement_sparsity/linear_layer_sparsity": 0.2591518105846925, "compression/movement_sparsity/model_sparsity": 0.2502491486468883, "compression_loss": 40.019466400146484, "distillation_loss": 0.7959657311439514, "epoch": 2.43, "learning_rate": 4.20400112707805e-05, "loss": 40.7932, "step": 2878, "task_loss": 0.7210441827774048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3730947894570976, "compression/movement_sparsity/importance_threshold": -0.00439067785166238, "compression/movement_sparsity/linear_layer_sparsity": 0.2598087606764227, "compression/movement_sparsity/model_sparsity": 0.2508835304819528, "compression_loss": 40.085548400878906, "distillation_loss": 0.7294604778289795, "epoch": 2.43, "learning_rate": 4.2035315112238196e-05, "loss": 40.7666, "step": 2879, "task_loss": 0.4791008532047272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37371324243177073, "compression/movement_sparsity/importance_threshold": -0.004386346371029384, "compression/movement_sparsity/linear_layer_sparsity": 0.2606237894585016, "compression/movement_sparsity/model_sparsity": 0.2516705605180454, "compression_loss": 40.1515998840332, "distillation_loss": 1.1352918148040771, "epoch": 2.43, "learning_rate": 4.2030618953695876e-05, "loss": 40.9431, "step": 2880, "task_loss": 0.9499437808990479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3743312885295015, "compression/movement_sparsity/importance_threshold": -0.004382017740054594, "compression/movement_sparsity/linear_layer_sparsity": 0.26126811185670545, "compression/movement_sparsity/model_sparsity": 0.25229274845970345, "compression_loss": 40.217594146728516, "distillation_loss": 0.9484983682632446, "epoch": 2.44, "learning_rate": 4.202592279515357e-05, "loss": 40.8706, "step": 2881, "task_loss": 0.4032846689224243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.374948927884175, "compression/movement_sparsity/importance_threshold": -0.004377691957800318, "compression/movement_sparsity/linear_layer_sparsity": 0.2619765743526226, "compression/movement_sparsity/model_sparsity": 0.25297687308940026, "compression_loss": 40.28354263305664, "distillation_loss": 1.3353146314620972, "epoch": 2.44, "learning_rate": 4.2021226636611255e-05, "loss": 41.0718, "step": 2882, "task_loss": 0.9825502634048462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3755661606296764, "compression/movement_sparsity/importance_threshold": -0.004373369023328857, "compression/movement_sparsity/linear_layer_sparsity": 0.26255964230168105, "compression/movement_sparsity/model_sparsity": 0.2535399108606817, "compression_loss": 40.349483489990234, "distillation_loss": 0.6703431606292725, "epoch": 2.44, "learning_rate": 4.201653047806894e-05, "loss": 41.0673, "step": 2883, "task_loss": 0.49749696254730225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37618298689989105, "compression/movement_sparsity/importance_threshold": -0.004369048935702516, "compression/movement_sparsity/linear_layer_sparsity": 0.2634891788655671, "compression/movement_sparsity/model_sparsity": 0.25443751498400935, "compression_loss": 40.41535568237305, "distillation_loss": 0.7440531253814697, "epoch": 2.44, "learning_rate": 4.201183431952663e-05, "loss": 41.077, "step": 2884, "task_loss": 0.36776742339134216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37679940682870383, "compression/movement_sparsity/importance_threshold": -0.0043647316939836, "compression/movement_sparsity/linear_layer_sparsity": 0.26421934334658154, "compression/movement_sparsity/model_sparsity": 0.25514259606885226, "compression_loss": 40.48122024536133, "distillation_loss": 0.9638993740081787, "epoch": 2.44, "learning_rate": 4.2007138160984314e-05, "loss": 41.3331, "step": 2885, "task_loss": 1.150522232055664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3774154205499999, "compression/movement_sparsity/importance_threshold": -0.004360417297234413, "compression/movement_sparsity/linear_layer_sparsity": 0.26499822997655614, "compression/movement_sparsity/model_sparsity": 0.2558947255469517, "compression_loss": 40.547027587890625, "distillation_loss": 0.731335461139679, "epoch": 2.44, "learning_rate": 4.200244200244201e-05, "loss": 41.4364, "step": 2886, "task_loss": 1.0942035913467407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3780310281976643, "compression/movement_sparsity/importance_threshold": -0.004356105744517259, "compression/movement_sparsity/linear_layer_sparsity": 0.2657790363975201, "compression/movement_sparsity/model_sparsity": 0.256648708865314, "compression_loss": 40.61281204223633, "distillation_loss": 0.544540524482727, "epoch": 2.44, "learning_rate": 4.1997745843899694e-05, "loss": 41.3138, "step": 2887, "task_loss": 0.4676317572593689 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3786462299055826, "compression/movement_sparsity/importance_threshold": -0.004351797034894441, "compression/movement_sparsity/linear_layer_sparsity": 0.26668183897756653, "compression/movement_sparsity/model_sparsity": 0.25752049739939026, "compression_loss": 40.678524017333984, "distillation_loss": 0.623945951461792, "epoch": 2.44, "learning_rate": 4.199304968535738e-05, "loss": 41.5715, "step": 2888, "task_loss": 0.7430536150932312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37926102580763965, "compression/movement_sparsity/importance_threshold": -0.0043474911674282616, "compression/movement_sparsity/linear_layer_sparsity": 0.26752262396173887, "compression/movement_sparsity/model_sparsity": 0.2583323988327991, "compression_loss": 40.74421691894531, "distillation_loss": 0.598175048828125, "epoch": 2.44, "learning_rate": 4.1988353526815066e-05, "loss": 41.4824, "step": 2889, "task_loss": 0.9430882334709167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3798754160377207, "compression/movement_sparsity/importance_threshold": -0.0043431881411810265, "compression/movement_sparsity/linear_layer_sparsity": 0.2681773800066241, "compression/movement_sparsity/model_sparsity": 0.2589646619932774, "compression_loss": 40.80987548828125, "distillation_loss": 0.5698367953300476, "epoch": 2.44, "learning_rate": 4.198365736827275e-05, "loss": 41.6767, "step": 2890, "task_loss": 0.29367557168006897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3804894007297107, "compression/movement_sparsity/importance_threshold": -0.00433888795521504, "compression/movement_sparsity/linear_layer_sparsity": 0.2688874999618427, "compression/movement_sparsity/model_sparsity": 0.2596503871434497, "compression_loss": 40.87548065185547, "distillation_loss": 0.5239613056182861, "epoch": 2.44, "learning_rate": 4.1978961209730446e-05, "loss": 41.5483, "step": 2891, "task_loss": 0.12229099869728088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.381102980017495, "compression/movement_sparsity/importance_threshold": -0.004334590608592605, "compression/movement_sparsity/linear_layer_sparsity": 0.2696505155246939, "compression/movement_sparsity/model_sparsity": 0.26038719077440653, "compression_loss": 40.9410400390625, "distillation_loss": 0.6010409593582153, "epoch": 2.44, "learning_rate": 4.1974265051188125e-05, "loss": 41.6898, "step": 2892, "task_loss": 0.6402194499969482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3817161540349585, "compression/movement_sparsity/importance_threshold": -0.004330296100376028, "compression/movement_sparsity/linear_layer_sparsity": 0.27049797804274234, "compression/movement_sparsity/model_sparsity": 0.26120554034786037, "compression_loss": 41.00656509399414, "distillation_loss": 0.649442732334137, "epoch": 2.45, "learning_rate": 4.196956889264582e-05, "loss": 41.7127, "step": 2893, "task_loss": 0.5247517228126526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38232892291598664, "compression/movement_sparsity/importance_threshold": -0.004326004429627609, "compression/movement_sparsity/linear_layer_sparsity": 0.27119722315707695, "compression/movement_sparsity/model_sparsity": 0.261880764241388, "compression_loss": 41.07201385498047, "distillation_loss": 0.6194124817848206, "epoch": 2.45, "learning_rate": 4.1964872734103505e-05, "loss": 41.7012, "step": 2894, "task_loss": 0.9719234704971313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3829412867944644, "compression/movement_sparsity/importance_threshold": -0.004321715595409654, "compression/movement_sparsity/linear_layer_sparsity": 0.2718854145680132, "compression/movement_sparsity/model_sparsity": 0.26254531416023413, "compression_loss": 41.137474060058594, "distillation_loss": 1.0065691471099854, "epoch": 2.45, "learning_rate": 4.196017657556119e-05, "loss": 41.9329, "step": 2895, "task_loss": 0.5961303114891052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38355324580427663, "compression/movement_sparsity/importance_threshold": -0.00431742959678447, "compression/movement_sparsity/linear_layer_sparsity": 0.2725802835128254, "compression/movement_sparsity/model_sparsity": 0.2632163122191252, "compression_loss": 41.20283889770508, "distillation_loss": 0.47348302602767944, "epoch": 2.45, "learning_rate": 4.1955480417018884e-05, "loss": 42.0792, "step": 2896, "task_loss": 0.5711849927902222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3841648000793093, "compression/movement_sparsity/importance_threshold": -0.004313146432814354, "compression/movement_sparsity/linear_layer_sparsity": 0.2733708319787479, "compression/movement_sparsity/model_sparsity": 0.2639797029132317, "compression_loss": 41.26814270019531, "distillation_loss": 0.5595443248748779, "epoch": 2.45, "learning_rate": 4.1950784258476564e-05, "loss": 42.0867, "step": 2897, "task_loss": 0.8683832883834839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3847759497534464, "compression/movement_sparsity/importance_threshold": -0.004308866102561619, "compression/movement_sparsity/linear_layer_sparsity": 0.2741195623887714, "compression/movement_sparsity/model_sparsity": 0.26470271213030666, "compression_loss": 41.33340835571289, "distillation_loss": 1.0132503509521484, "epoch": 2.45, "learning_rate": 4.194608809993426e-05, "loss": 42.2158, "step": 2898, "task_loss": 0.9445706605911255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3853866949605741, "compression/movement_sparsity/importance_threshold": -0.00430458860508856, "compression/movement_sparsity/linear_layer_sparsity": 0.2749316578256118, "compression/movement_sparsity/model_sparsity": 0.2654869095905939, "compression_loss": 41.39863586425781, "distillation_loss": 1.0065538883209229, "epoch": 2.45, "learning_rate": 4.194139194139194e-05, "loss": 42.1357, "step": 2899, "task_loss": 0.25425082445144653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.385997035834577, "compression/movement_sparsity/importance_threshold": -0.004300313939457486, "compression/movement_sparsity/linear_layer_sparsity": 0.27566486296937337, "compression/movement_sparsity/model_sparsity": 0.2661949268820644, "compression_loss": 41.463802337646484, "distillation_loss": 0.9346264600753784, "epoch": 2.45, "learning_rate": 4.1936695782849636e-05, "loss": 42.4009, "step": 2900, "task_loss": 1.6883752346038818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3866069725093405, "compression/movement_sparsity/importance_threshold": -0.0042960421047307, "compression/movement_sparsity/linear_layer_sparsity": 0.2763852615570941, "compression/movement_sparsity/model_sparsity": 0.2668905775620916, "compression_loss": 41.528907775878906, "distillation_loss": 0.5689643621444702, "epoch": 2.45, "learning_rate": 4.1931999624307316e-05, "loss": 42.4614, "step": 2901, "task_loss": 1.0131381750106812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3872165051187495, "compression/movement_sparsity/importance_threshold": -0.004291773099970505, "compression/movement_sparsity/linear_layer_sparsity": 0.27716716500148053, "compression/movement_sparsity/model_sparsity": 0.267645620217747, "compression_loss": 41.59398651123047, "distillation_loss": 1.53434157371521, "epoch": 2.45, "learning_rate": 4.1927303465765e-05, "loss": 42.6286, "step": 2902, "task_loss": 1.245392084121704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38782563379668933, "compression/movement_sparsity/importance_threshold": -0.004287506924239207, "compression/movement_sparsity/linear_layer_sparsity": 0.27790628453238947, "compression/movement_sparsity/model_sparsity": 0.2683593487189716, "compression_loss": 41.65904235839844, "distillation_loss": 0.7506531476974487, "epoch": 2.45, "learning_rate": 4.1922607307222695e-05, "loss": 42.5519, "step": 2903, "task_loss": 0.9118168950080872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.388434358677045, "compression/movement_sparsity/importance_threshold": -0.004283243576599108, "compression/movement_sparsity/linear_layer_sparsity": 0.2787032601246677, "compression/movement_sparsity/model_sparsity": 0.26912894574787133, "compression_loss": 41.724021911621094, "distillation_loss": 0.6856245994567871, "epoch": 2.45, "learning_rate": 4.191791114868038e-05, "loss": 42.5667, "step": 2904, "task_loss": 1.7196595668792725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3890426798937018, "compression/movement_sparsity/importance_threshold": -0.004278983056112513, "compression/movement_sparsity/linear_layer_sparsity": 0.2794917934062596, "compression/movement_sparsity/model_sparsity": 0.2698903904854285, "compression_loss": 41.78897476196289, "distillation_loss": 0.47569721937179565, "epoch": 2.46, "learning_rate": 4.191321499013807e-05, "loss": 42.3231, "step": 2905, "task_loss": 1.086851954460144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38965059758054466, "compression/movement_sparsity/importance_threshold": -0.004274725361841727, "compression/movement_sparsity/linear_layer_sparsity": 0.28022291182068493, "compression/movement_sparsity/model_sparsity": 0.270596392733135, "compression_loss": 41.8538932800293, "distillation_loss": 0.693184494972229, "epoch": 2.46, "learning_rate": 4.1908518831595754e-05, "loss": 42.5815, "step": 2906, "task_loss": 0.4588662385940552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39025811187145865, "compression/movement_sparsity/importance_threshold": -0.004270470492849053, "compression/movement_sparsity/linear_layer_sparsity": 0.28104732492269324, "compression/movement_sparsity/model_sparsity": 0.271392484708898, "compression_loss": 41.91870880126953, "distillation_loss": 0.7055269479751587, "epoch": 2.46, "learning_rate": 4.190382267305345e-05, "loss": 42.6951, "step": 2907, "task_loss": 0.9223252534866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3908652229003293, "compression/movement_sparsity/importance_threshold": -0.0042662184481967935, "compression/movement_sparsity/linear_layer_sparsity": 0.2818362517018172, "compression/movement_sparsity/model_sparsity": 0.2721543094261364, "compression_loss": 41.983482360839844, "distillation_loss": 0.6424552202224731, "epoch": 2.46, "learning_rate": 4.1899126514511134e-05, "loss": 42.7729, "step": 2908, "task_loss": 1.3742283582687378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3914719308010416, "compression/movement_sparsity/importance_threshold": -0.0042619692269472535, "compression/movement_sparsity/linear_layer_sparsity": 0.28264981381127685, "compression/movement_sparsity/model_sparsity": 0.2729399231743263, "compression_loss": 42.0482292175293, "distillation_loss": 0.6112089157104492, "epoch": 2.46, "learning_rate": 4.189443035596882e-05, "loss": 42.9139, "step": 2909, "task_loss": 1.4950002431869507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39207823570748057, "compression/movement_sparsity/importance_threshold": -0.004257722828162738, "compression/movement_sparsity/linear_layer_sparsity": 0.2834723548189663, "compression/movement_sparsity/model_sparsity": 0.27373420736796955, "compression_loss": 42.11294937133789, "distillation_loss": 1.268035650253296, "epoch": 2.46, "learning_rate": 4.1889734197426507e-05, "loss": 43.043, "step": 2910, "task_loss": 1.302301049232483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3926841377535315, "compression/movement_sparsity/importance_threshold": -0.004253479250905551, "compression/movement_sparsity/linear_layer_sparsity": 0.28428825406528263, "compression/movement_sparsity/model_sparsity": 0.2745220779651752, "compression_loss": 42.17762756347656, "distillation_loss": 0.5152312517166138, "epoch": 2.46, "learning_rate": 4.188503803888419e-05, "loss": 42.8772, "step": 2911, "task_loss": 0.4991191625595093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3932896370730794, "compression/movement_sparsity/importance_threshold": -0.004249238494237994, "compression/movement_sparsity/linear_layer_sparsity": 0.28499360435344684, "compression/movement_sparsity/model_sparsity": 0.27520319730102966, "compression_loss": 42.242218017578125, "distillation_loss": 0.2787705063819885, "epoch": 2.46, "learning_rate": 4.1880341880341886e-05, "loss": 42.943, "step": 2912, "task_loss": 0.1897142082452774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3938947338000094, "compression/movement_sparsity/importance_threshold": -0.004245000557222374, "compression/movement_sparsity/linear_layer_sparsity": 0.28569287331611676, "compression/movement_sparsity/model_sparsity": 0.2758784442236289, "compression_loss": 42.306766510009766, "distillation_loss": 1.042336106300354, "epoch": 2.46, "learning_rate": 4.187564572179957e-05, "loss": 43.1683, "step": 2913, "task_loss": 0.349286824464798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3944994280682067, "compression/movement_sparsity/importance_threshold": -0.004240765438920993, "compression/movement_sparsity/linear_layer_sparsity": 0.2865569938963525, "compression/movement_sparsity/model_sparsity": 0.2767128796035877, "compression_loss": 42.371299743652344, "distillation_loss": 0.5994269847869873, "epoch": 2.46, "learning_rate": 4.187094956325726e-05, "loss": 43.3492, "step": 2914, "task_loss": 1.0650286674499512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3951037200115566, "compression/movement_sparsity/importance_threshold": -0.0042365331383961554, "compression/movement_sparsity/linear_layer_sparsity": 0.28742491828606404, "compression/movement_sparsity/model_sparsity": 0.2775509881204649, "compression_loss": 42.43577194213867, "distillation_loss": 0.5641458034515381, "epoch": 2.46, "learning_rate": 4.1866253404714945e-05, "loss": 43.0292, "step": 2915, "task_loss": 0.42542505264282227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3957076097639439, "compression/movement_sparsity/importance_threshold": -0.004232303654710166, "compression/movement_sparsity/linear_layer_sparsity": 0.28816385895445845, "compression/movement_sparsity/model_sparsity": 0.27826454390365263, "compression_loss": 42.50020980834961, "distillation_loss": 0.6959583759307861, "epoch": 2.46, "learning_rate": 4.186155724617263e-05, "loss": 43.1865, "step": 2916, "task_loss": 2.0493392944335938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39631109745925397, "compression/movement_sparsity/importance_threshold": -0.004228076986925329, "compression/movement_sparsity/linear_layer_sparsity": 0.28891790754324753, "compression/movement_sparsity/model_sparsity": 0.27899268860369203, "compression_loss": 42.56462478637695, "distillation_loss": 0.5928775668144226, "epoch": 2.47, "learning_rate": 4.1856861087630324e-05, "loss": 43.3091, "step": 2917, "task_loss": 0.6065967679023743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39691418323137173, "compression/movement_sparsity/importance_threshold": -0.004223853134103946, "compression/movement_sparsity/linear_layer_sparsity": 0.2898188380289752, "compression/movement_sparsity/model_sparsity": 0.2798626693556485, "compression_loss": 42.628990173339844, "distillation_loss": 0.8544743061065674, "epoch": 2.47, "learning_rate": 4.1852164929088004e-05, "loss": 43.5233, "step": 2918, "task_loss": 0.3194716274738312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39751686721418267, "compression/movement_sparsity/importance_threshold": -0.004219632095308325, "compression/movement_sparsity/linear_layer_sparsity": 0.29068480685519443, "compression/movement_sparsity/model_sparsity": 0.28069888948865546, "compression_loss": 42.69329833984375, "distillation_loss": 0.6866007447242737, "epoch": 2.47, "learning_rate": 4.18474687705457e-05, "loss": 43.7207, "step": 2919, "task_loss": 0.3574233949184418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3981191495415718, "compression/movement_sparsity/importance_threshold": -0.004215413869600764, "compression/movement_sparsity/linear_layer_sparsity": 0.2913518924894152, "compression/movement_sparsity/model_sparsity": 0.2813430586791453, "compression_loss": 42.75755310058594, "distillation_loss": 1.5404736995697021, "epoch": 2.47, "learning_rate": 4.1842772612003383e-05, "loss": 43.6753, "step": 2920, "task_loss": 0.2516445517539978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39872103034742423, "compression/movement_sparsity/importance_threshold": -0.004211198456043572, "compression/movement_sparsity/linear_layer_sparsity": 0.29219445417456524, "compression/movement_sparsity/model_sparsity": 0.2821566757783875, "compression_loss": 42.82181167602539, "distillation_loss": 0.9382905960083008, "epoch": 2.47, "learning_rate": 4.183807645346107e-05, "loss": 43.8622, "step": 2921, "task_loss": 0.8016681671142578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.399322509765625, "compression/movement_sparsity/importance_threshold": -0.004206985853699052, "compression/movement_sparsity/linear_layer_sparsity": 0.2929272896691301, "compression/movement_sparsity/model_sparsity": 0.2828643361192484, "compression_loss": 42.88600158691406, "distillation_loss": 1.0830966234207153, "epoch": 2.47, "learning_rate": 4.1833380294918756e-05, "loss": 43.8048, "step": 2922, "task_loss": 1.0063364505767822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39992358793005933, "compression/movement_sparsity/importance_threshold": -0.004202776061629507, "compression/movement_sparsity/linear_layer_sparsity": 0.2936662064891893, "compression/movement_sparsity/model_sparsity": 0.2835778688733645, "compression_loss": 42.95016098022461, "distillation_loss": 0.9263947010040283, "epoch": 2.47, "learning_rate": 4.182868413637644e-05, "loss": 43.8085, "step": 2923, "task_loss": 1.1111057996749878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40052426497461247, "compression/movement_sparsity/importance_threshold": -0.004198569078897242, "compression/movement_sparsity/linear_layer_sparsity": 0.2945333915805074, "compression/movement_sparsity/model_sparsity": 0.28441526348902246, "compression_loss": 43.0142707824707, "distillation_loss": 0.8823603391647339, "epoch": 2.47, "learning_rate": 4.1823987977834136e-05, "loss": 43.784, "step": 2924, "task_loss": 1.5097572803497314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4011245410331694, "compression/movement_sparsity/importance_threshold": -0.0041943649045645594, "compression/movement_sparsity/linear_layer_sparsity": 0.2954618549693062, "compression/movement_sparsity/model_sparsity": 0.2853118313041286, "compression_loss": 43.07833480834961, "distillation_loss": 0.7987208366394043, "epoch": 2.47, "learning_rate": 4.181929181929182e-05, "loss": 43.8147, "step": 2925, "task_loss": 0.3470713496208191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4017244162396153, "compression/movement_sparsity/importance_threshold": -0.004190163537693765, "compression/movement_sparsity/linear_layer_sparsity": 0.2963320568750363, "compression/movement_sparsity/model_sparsity": 0.2861521390973426, "compression_loss": 43.14236068725586, "distillation_loss": 0.9048537015914917, "epoch": 2.47, "learning_rate": 4.181459566074951e-05, "loss": 43.8766, "step": 2926, "task_loss": 1.1825629472732544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40232389072783514, "compression/movement_sparsity/importance_threshold": -0.004185964977347164, "compression/movement_sparsity/linear_layer_sparsity": 0.2972232810883083, "compression/movement_sparsity/model_sparsity": 0.2870127470171624, "compression_loss": 43.206329345703125, "distillation_loss": 1.0933799743652344, "epoch": 2.47, "learning_rate": 4.1809899502207195e-05, "loss": 43.9441, "step": 2927, "task_loss": 1.2188332080841064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4029229646317144, "compression/movement_sparsity/importance_threshold": -0.0041817692225870565, "compression/movement_sparsity/linear_layer_sparsity": 0.29815454665650154, "compression/movement_sparsity/model_sparsity": 0.28791202074818023, "compression_loss": 43.27021408081055, "distillation_loss": 0.7390118837356567, "epoch": 2.47, "learning_rate": 4.180520334366488e-05, "loss": 43.9092, "step": 2928, "task_loss": 0.6102986335754395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4035216380851381, "compression/movement_sparsity/importance_threshold": -0.004177576272475747, "compression/movement_sparsity/linear_layer_sparsity": 0.2990434575812522, "compression/movement_sparsity/model_sparsity": 0.28877039484805583, "compression_loss": 43.33408737182617, "distillation_loss": 0.5542397499084473, "epoch": 2.48, "learning_rate": 4.1800507185122574e-05, "loss": 44.2212, "step": 2929, "task_loss": 0.7207878232002258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4041199112219914, "compression/movement_sparsity/importance_threshold": -0.004173386126075542, "compression/movement_sparsity/linear_layer_sparsity": 0.2998072124424969, "compression/movement_sparsity/model_sparsity": 0.289507912380232, "compression_loss": 43.39794158935547, "distillation_loss": 0.3249339163303375, "epoch": 2.48, "learning_rate": 4.179581102658026e-05, "loss": 44.0025, "step": 2930, "task_loss": 0.80772864818573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40471778417615933, "compression/movement_sparsity/importance_threshold": -0.0041691987824487445, "compression/movement_sparsity/linear_layer_sparsity": 0.3005536891848372, "compression/movement_sparsity/model_sparsity": 0.29022874535004184, "compression_loss": 43.46168518066406, "distillation_loss": 0.5256613492965698, "epoch": 2.48, "learning_rate": 4.179111486803795e-05, "loss": 44.3212, "step": 2931, "task_loss": 0.5185351371765137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40531525708152716, "compression/movement_sparsity/importance_threshold": -0.004165014240657658, "compression/movement_sparsity/linear_layer_sparsity": 0.3014118953779255, "compression/movement_sparsity/model_sparsity": 0.2910574695202465, "compression_loss": 43.52542495727539, "distillation_loss": 0.839435338973999, "epoch": 2.48, "learning_rate": 4.178641870949563e-05, "loss": 44.4701, "step": 2932, "task_loss": 1.6290571689605713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4059123300719799, "compression/movement_sparsity/importance_threshold": -0.004160832499764586, "compression/movement_sparsity/linear_layer_sparsity": 0.3021416782855756, "compression/movement_sparsity/model_sparsity": 0.291762182139944, "compression_loss": 43.58913040161133, "distillation_loss": 0.5258724689483643, "epoch": 2.48, "learning_rate": 4.178172255095332e-05, "loss": 44.2958, "step": 2933, "task_loss": 0.663270890712738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4065090032814027, "compression/movement_sparsity/importance_threshold": -0.004156653558831835, "compression/movement_sparsity/linear_layer_sparsity": 0.30288673605196725, "compression/movement_sparsity/model_sparsity": 0.2924816448799943, "compression_loss": 43.652732849121094, "distillation_loss": 0.6036491394042969, "epoch": 2.48, "learning_rate": 4.177702639241101e-05, "loss": 44.465, "step": 2934, "task_loss": 0.5574185848236084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4071052768436808, "compression/movement_sparsity/importance_threshold": -0.004152477416921705, "compression/movement_sparsity/linear_layer_sparsity": 0.30364587626033684, "compression/movement_sparsity/model_sparsity": 0.29321470628681795, "compression_loss": 43.71631622314453, "distillation_loss": 0.33077341318130493, "epoch": 2.48, "learning_rate": 4.177233023386869e-05, "loss": 44.3507, "step": 2935, "task_loss": 0.03658715635538101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4077011508926993, "compression/movement_sparsity/importance_threshold": -0.004148304073096505, "compression/movement_sparsity/linear_layer_sparsity": 0.30454377800748494, "compression/movement_sparsity/model_sparsity": 0.29408176234668265, "compression_loss": 43.7798957824707, "distillation_loss": 0.5207771062850952, "epoch": 2.48, "learning_rate": 4.1767634075326385e-05, "loss": 44.4831, "step": 2936, "task_loss": 0.5050748586654663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40829662556234336, "compression/movement_sparsity/importance_threshold": -0.004144133526418533, "compression/movement_sparsity/linear_layer_sparsity": 0.30534378233834264, "compression/movement_sparsity/model_sparsity": 0.2948542840676742, "compression_loss": 43.84335708618164, "distillation_loss": 0.8236490488052368, "epoch": 2.48, "learning_rate": 4.176293791678407e-05, "loss": 44.8155, "step": 2937, "task_loss": 0.5536699891090393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40889170098649785, "compression/movement_sparsity/importance_threshold": -0.0041399657759501, "compression/movement_sparsity/linear_layer_sparsity": 0.30613796767539403, "compression/movement_sparsity/model_sparsity": 0.295621186695198, "compression_loss": 43.90678405761719, "distillation_loss": 0.6964631080627441, "epoch": 2.48, "learning_rate": 4.1758241758241765e-05, "loss": 44.7836, "step": 2938, "task_loss": 0.21174968779087067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40948637729904847, "compression/movement_sparsity/importance_threshold": -0.004135800820753504, "compression/movement_sparsity/linear_layer_sparsity": 0.30702517344417274, "compression/movement_sparsity/model_sparsity": 0.29647791421645503, "compression_loss": 43.97017288208008, "distillation_loss": 1.527217149734497, "epoch": 2.48, "learning_rate": 4.1753545599699444e-05, "loss": 45.2043, "step": 2939, "task_loss": 1.802750825881958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4100806546338799, "compression/movement_sparsity/importance_threshold": -0.0041316386598910515, "compression/movement_sparsity/linear_layer_sparsity": 0.3079654298347635, "compression/movement_sparsity/model_sparsity": 0.29738586990746196, "compression_loss": 44.033512115478516, "distillation_loss": 0.7779862880706787, "epoch": 2.48, "learning_rate": 4.174884944115714e-05, "loss": 44.942, "step": 2940, "task_loss": 0.6638805270195007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41067453312487745, "compression/movement_sparsity/importance_threshold": -0.004127479292425045, "compression/movement_sparsity/linear_layer_sparsity": 0.3086950577282343, "compression/movement_sparsity/model_sparsity": 0.2980904328381941, "compression_loss": 44.09684371948242, "distillation_loss": 0.5371556282043457, "epoch": 2.49, "learning_rate": 4.1744153282614824e-05, "loss": 44.7625, "step": 2941, "task_loss": 1.0014811754226685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4112680129059263, "compression/movement_sparsity/importance_threshold": -0.0041233227174177906, "compression/movement_sparsity/linear_layer_sparsity": 0.30942293276906263, "compression/movement_sparsity/model_sparsity": 0.29879330313216446, "compression_loss": 44.16010284423828, "distillation_loss": 0.9129904508590698, "epoch": 2.49, "learning_rate": 4.173945712407251e-05, "loss": 44.8287, "step": 2942, "task_loss": 0.8579537272453308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41186109411091154, "compression/movement_sparsity/importance_threshold": -0.0041191689339315895, "compression/movement_sparsity/linear_layer_sparsity": 0.3103906266693835, "compression/movement_sparsity/model_sparsity": 0.29972775377003463, "compression_loss": 44.22334289550781, "distillation_loss": 1.5290093421936035, "epoch": 2.49, "learning_rate": 4.17347609655302e-05, "loss": 45.246, "step": 2943, "task_loss": 1.0454132556915283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4124537768737182, "compression/movement_sparsity/importance_threshold": -0.004115017941028749, "compression/movement_sparsity/linear_layer_sparsity": 0.3111751772789851, "compression/movement_sparsity/model_sparsity": 0.30048535265263643, "compression_loss": 44.2865104675293, "distillation_loss": 0.5292792320251465, "epoch": 2.49, "learning_rate": 4.173006480698788e-05, "loss": 44.9714, "step": 2944, "task_loss": 0.6021685004234314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41304606132823163, "compression/movement_sparsity/importance_threshold": -0.004110869737771571, "compression/movement_sparsity/linear_layer_sparsity": 0.3120837630803349, "compression/movement_sparsity/model_sparsity": 0.3013627257365731, "compression_loss": 44.34966278076172, "distillation_loss": 1.10421621799469, "epoch": 2.49, "learning_rate": 4.1725368648445576e-05, "loss": 45.1226, "step": 2945, "task_loss": 0.9707024097442627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4136379476083367, "compression/movement_sparsity/importance_threshold": -0.004106724323222361, "compression/movement_sparsity/linear_layer_sparsity": 0.31276450188649874, "compression/movement_sparsity/model_sparsity": 0.30202007907054756, "compression_loss": 44.412776947021484, "distillation_loss": 0.32868367433547974, "epoch": 2.49, "learning_rate": 4.172067248990326e-05, "loss": 45.065, "step": 2946, "task_loss": 0.6558057069778442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4142294358479186, "compression/movement_sparsity/importance_threshold": -0.004102581696443423, "compression/movement_sparsity/linear_layer_sparsity": 0.31346224455571126, "compression/movement_sparsity/model_sparsity": 0.30269385213256517, "compression_loss": 44.475826263427734, "distillation_loss": 0.8931636214256287, "epoch": 2.49, "learning_rate": 4.171597633136095e-05, "loss": 45.4207, "step": 2947, "task_loss": 1.1884026527404785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41482052618086307, "compression/movement_sparsity/importance_threshold": -0.004098441856497056, "compression/movement_sparsity/linear_layer_sparsity": 0.3142285273404947, "compression/movement_sparsity/model_sparsity": 0.30343381074632975, "compression_loss": 44.53882598876953, "distillation_loss": 0.6348766088485718, "epoch": 2.49, "learning_rate": 4.1711280172818635e-05, "loss": 45.4906, "step": 2948, "task_loss": 0.3994178771972656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41541121874105424, "compression/movement_sparsity/importance_threshold": -0.004094304802445572, "compression/movement_sparsity/linear_layer_sparsity": 0.3150193500622728, "compression/movement_sparsity/model_sparsity": 0.30419746627475946, "compression_loss": 44.60183334350586, "distillation_loss": 0.6538788080215454, "epoch": 2.49, "learning_rate": 4.170658401427632e-05, "loss": 45.5572, "step": 2949, "task_loss": 0.8999935984611511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41600151366237803, "compression/movement_sparsity/importance_threshold": -0.004090170533351268, "compression/movement_sparsity/linear_layer_sparsity": 0.31585925265804005, "compression/movement_sparsity/model_sparsity": 0.3050085156325195, "compression_loss": 44.664798736572266, "distillation_loss": 1.3343597650527954, "epoch": 2.49, "learning_rate": 4.1701887855734014e-05, "loss": 45.5255, "step": 2950, "task_loss": 1.1742417812347412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41659141107871933, "compression/movement_sparsity/importance_threshold": -0.004086039048276452, "compression/movement_sparsity/linear_layer_sparsity": 0.31663964173313675, "compression/movement_sparsity/model_sparsity": 0.30576209594212905, "compression_loss": 44.727718353271484, "distillation_loss": 1.0035552978515625, "epoch": 2.49, "learning_rate": 4.16971916971917e-05, "loss": 45.6523, "step": 2951, "task_loss": 1.1468698978424072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41718091112396327, "compression/movement_sparsity/importance_threshold": -0.004081910346283427, "compression/movement_sparsity/linear_layer_sparsity": 0.31743984877484427, "compression/movement_sparsity/model_sparsity": 0.3065348134102291, "compression_loss": 44.790584564208984, "distillation_loss": 1.2424607276916504, "epoch": 2.5, "learning_rate": 4.169249553864939e-05, "loss": 45.6839, "step": 2952, "task_loss": 0.9031699895858765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.417770013931995, "compression/movement_sparsity/importance_threshold": -0.004077784426434496, "compression/movement_sparsity/linear_layer_sparsity": 0.318247448800486, "compression/movement_sparsity/model_sparsity": 0.30731466989052175, "compression_loss": 44.85344696044922, "distillation_loss": 0.9061406850814819, "epoch": 2.5, "learning_rate": 4.168779938010707e-05, "loss": 45.7166, "step": 2953, "task_loss": 0.27855953574180603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41835871963669957, "compression/movement_sparsity/importance_threshold": -0.004073661287791965, "compression/movement_sparsity/linear_layer_sparsity": 0.3190533555943234, "compression/movement_sparsity/model_sparsity": 0.30809289130673156, "compression_loss": 44.916255950927734, "distillation_loss": 0.6621519327163696, "epoch": 2.5, "learning_rate": 4.168310322156476e-05, "loss": 45.7283, "step": 2954, "task_loss": 0.290755033493042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4189470283719622, "compression/movement_sparsity/importance_threshold": -0.004069540929418136, "compression/movement_sparsity/linear_layer_sparsity": 0.3198381089147748, "compression/movement_sparsity/model_sparsity": 0.3088506859364419, "compression_loss": 44.97902297973633, "distillation_loss": 0.8518170714378357, "epoch": 2.5, "learning_rate": 4.167840706302245e-05, "loss": 45.7434, "step": 2955, "task_loss": 2.3175394535064697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.41953494027166816, "compression/movement_sparsity/importance_threshold": -0.004065423350375314, "compression/movement_sparsity/linear_layer_sparsity": 0.3206730272084703, "compression/movement_sparsity/model_sparsity": 0.30965692221823987, "compression_loss": 45.04173278808594, "distillation_loss": 1.1798666715621948, "epoch": 2.5, "learning_rate": 4.167371090448014e-05, "loss": 46.1728, "step": 2956, "task_loss": 0.5762167572975159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4201224554697024, "compression/movement_sparsity/importance_threshold": -0.004061308549725803, "compression/movement_sparsity/linear_layer_sparsity": 0.3216044954875134, "compression/movement_sparsity/model_sparsity": 0.31055639169636623, "compression_loss": 45.10441970825195, "distillation_loss": 1.5197710990905762, "epoch": 2.5, "learning_rate": 4.1669014745937825e-05, "loss": 46.2245, "step": 2957, "task_loss": 1.622458577156067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4207095740999499, "compression/movement_sparsity/importance_threshold": -0.004057196526531907, "compression/movement_sparsity/linear_layer_sparsity": 0.32241366950328304, "compression/movement_sparsity/model_sparsity": 0.3113377680953837, "compression_loss": 45.16708755493164, "distillation_loss": 0.8017421364784241, "epoch": 2.5, "learning_rate": 4.166431858739551e-05, "loss": 45.9341, "step": 2958, "task_loss": 0.7219691276550293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42129629629629617, "compression/movement_sparsity/importance_threshold": -0.0040530872798559305, "compression/movement_sparsity/linear_layer_sparsity": 0.32323511348755, "compression/movement_sparsity/model_sparsity": 0.31213099295173385, "compression_loss": 45.229671478271484, "distillation_loss": 0.8418365120887756, "epoch": 2.5, "learning_rate": 4.16596224288532e-05, "loss": 46.0451, "step": 2959, "task_loss": 0.9927123188972473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4218826221926263, "compression/movement_sparsity/importance_threshold": -0.004048980808760175, "compression/movement_sparsity/linear_layer_sparsity": 0.3240397443954504, "compression/movement_sparsity/model_sparsity": 0.3129079823126137, "compression_loss": 45.29218292236328, "distillation_loss": 0.5655040144920349, "epoch": 2.5, "learning_rate": 4.165492627031089e-05, "loss": 46.3866, "step": 2960, "task_loss": 0.5054582953453064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4224685519228254, "compression/movement_sparsity/importance_threshold": -0.0040448771123069455, "compression/movement_sparsity/linear_layer_sparsity": 0.3248579330819528, "compression/movement_sparsity/model_sparsity": 0.3136980637006919, "compression_loss": 45.354671478271484, "distillation_loss": 1.1326653957366943, "epoch": 2.5, "learning_rate": 4.165023011176857e-05, "loss": 46.2157, "step": 2961, "task_loss": 1.6219327449798584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4230540856207784, "compression/movement_sparsity/importance_threshold": -0.004040776189558549, "compression/movement_sparsity/linear_layer_sparsity": 0.32563642621439537, "compression/movement_sparsity/model_sparsity": 0.3144498131991101, "compression_loss": 45.4171257019043, "distillation_loss": 0.9375326633453369, "epoch": 2.5, "learning_rate": 4.1645533953226264e-05, "loss": 46.2661, "step": 2962, "task_loss": 1.1186583042144775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42363922342037075, "compression/movement_sparsity/importance_threshold": -0.004036678039577284, "compression/movement_sparsity/linear_layer_sparsity": 0.32643619206190033, "compression/movement_sparsity/model_sparsity": 0.3152221046293857, "compression_loss": 45.479515075683594, "distillation_loss": 0.5558945536613464, "epoch": 2.5, "learning_rate": 4.164083779468395e-05, "loss": 46.2633, "step": 2963, "task_loss": 0.5486981272697449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42422396545548735, "compression/movement_sparsity/importance_threshold": -0.004032582661425459, "compression/movement_sparsity/linear_layer_sparsity": 0.32721348085341173, "compression/movement_sparsity/model_sparsity": 0.3159726911596887, "compression_loss": 45.5418586730957, "distillation_loss": 1.4929382801055908, "epoch": 2.51, "learning_rate": 4.1636141636141643e-05, "loss": 46.6576, "step": 2964, "task_loss": 0.8191894888877869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42480831186001333, "compression/movement_sparsity/importance_threshold": -0.004028490054165378, "compression/movement_sparsity/linear_layer_sparsity": 0.32807930658961937, "compression/movement_sparsity/model_sparsity": 0.31680877311826605, "compression_loss": 45.604190826416016, "distillation_loss": 0.7607845664024353, "epoch": 2.51, "learning_rate": 4.163144547759932e-05, "loss": 46.6781, "step": 2965, "task_loss": 0.5233112573623657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42539226276783404, "compression/movement_sparsity/importance_threshold": -0.004024400216859342, "compression/movement_sparsity/linear_layer_sparsity": 0.32893274311565335, "compression/movement_sparsity/model_sparsity": 0.3176328914741529, "compression_loss": 45.6664924621582, "distillation_loss": 0.8617205023765564, "epoch": 2.51, "learning_rate": 4.162674931905701e-05, "loss": 46.4778, "step": 2966, "task_loss": 0.9161517024040222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4259758183128345, "compression/movement_sparsity/importance_threshold": -0.004020313148569658, "compression/movement_sparsity/linear_layer_sparsity": 0.32969610447936604, "compression/movement_sparsity/model_sparsity": 0.31837002902664785, "compression_loss": 45.72873306274414, "distillation_loss": 0.5909499526023865, "epoch": 2.51, "learning_rate": 4.16220531605147e-05, "loss": 46.8295, "step": 2967, "task_loss": 0.954791784286499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4265589786288996, "compression/movement_sparsity/importance_threshold": -0.004016228848358628, "compression/movement_sparsity/linear_layer_sparsity": 0.3304533964417521, "compression/movement_sparsity/model_sparsity": 0.31910130568042333, "compression_loss": 45.79096221923828, "distillation_loss": 0.7730787992477417, "epoch": 2.51, "learning_rate": 4.161735700197239e-05, "loss": 46.854, "step": 2968, "task_loss": 1.202275037765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.427141743849915, "compression/movement_sparsity/importance_threshold": -0.004012147315288556, "compression/movement_sparsity/linear_layer_sparsity": 0.3312154222986895, "compression/movement_sparsity/model_sparsity": 0.3198371536049093, "compression_loss": 45.85311508178711, "distillation_loss": 0.4474985897541046, "epoch": 2.51, "learning_rate": 4.1612660843430075e-05, "loss": 46.703, "step": 2969, "task_loss": 0.27049994468688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4277241141097655, "compression/movement_sparsity/importance_threshold": -0.004008068548421747, "compression/movement_sparsity/linear_layer_sparsity": 0.33205509833527175, "compression/movement_sparsity/model_sparsity": 0.3206479841864892, "compression_loss": 45.915199279785156, "distillation_loss": 1.236438274383545, "epoch": 2.51, "learning_rate": 4.160796468488776e-05, "loss": 46.9184, "step": 2970, "task_loss": 1.1832753419876099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4283060895423363, "compression/movement_sparsity/importance_threshold": -0.004003992546820505, "compression/movement_sparsity/linear_layer_sparsity": 0.3327313178819015, "compression/movement_sparsity/model_sparsity": 0.32130097351139764, "compression_loss": 45.97730255126953, "distillation_loss": 0.5596473217010498, "epoch": 2.51, "learning_rate": 4.1603268526345455e-05, "loss": 46.8051, "step": 2971, "task_loss": 0.4584386944770813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42888767028151265, "compression/movement_sparsity/importance_threshold": -0.003999919309547132, "compression/movement_sparsity/linear_layer_sparsity": 0.3336013528492846, "compression/movement_sparsity/model_sparsity": 0.3221411201011105, "compression_loss": 46.03932571411133, "distillation_loss": 1.1359453201293945, "epoch": 2.51, "learning_rate": 4.159857236780314e-05, "loss": 47.2822, "step": 2972, "task_loss": 0.8737763166427612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4294688564611795, "compression/movement_sparsity/importance_threshold": -0.003995848835663934, "compression/movement_sparsity/linear_layer_sparsity": 0.33431665981742476, "compression/movement_sparsity/model_sparsity": 0.32283185407435344, "compression_loss": 46.101314544677734, "distillation_loss": 0.8587710857391357, "epoch": 2.51, "learning_rate": 4.159387620926083e-05, "loss": 47.157, "step": 2973, "task_loss": 0.6843359470367432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4300496482152222, "compression/movement_sparsity/importance_threshold": -0.003991781124233214, "compression/movement_sparsity/linear_layer_sparsity": 0.3350952841157114, "compression/movement_sparsity/model_sparsity": 0.32358373023266535, "compression_loss": 46.163265228271484, "distillation_loss": 1.0358749628067017, "epoch": 2.51, "learning_rate": 4.1589180050718514e-05, "loss": 47.0823, "step": 2974, "task_loss": 0.6716317534446716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4306300456775256, "compression/movement_sparsity/importance_threshold": -0.003987716174317276, "compression/movement_sparsity/linear_layer_sparsity": 0.3358344632674585, "compression/movement_sparsity/model_sparsity": 0.32429751630656894, "compression_loss": 46.22522735595703, "distillation_loss": 0.8303221464157104, "epoch": 2.51, "learning_rate": 4.15844838921762e-05, "loss": 47.1971, "step": 2975, "task_loss": 1.596840262413025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4312100489819751, "compression/movement_sparsity/importance_threshold": -0.003983653984978425, "compression/movement_sparsity/linear_layer_sparsity": 0.33653064387071074, "compression/movement_sparsity/model_sparsity": 0.32496978096439744, "compression_loss": 46.28711700439453, "distillation_loss": 1.0439051389694214, "epoch": 2.52, "learning_rate": 4.157978773363389e-05, "loss": 47.2883, "step": 2976, "task_loss": 0.7901548743247986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43178965826245586, "compression/movement_sparsity/importance_threshold": -0.003979594555278965, "compression/movement_sparsity/linear_layer_sparsity": 0.3372892832640396, "compression/movement_sparsity/model_sparsity": 0.3257023587607177, "compression_loss": 46.348941802978516, "distillation_loss": 1.0318669080734253, "epoch": 2.52, "learning_rate": 4.157509157509158e-05, "loss": 47.4219, "step": 2977, "task_loss": 0.3478185832500458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4323688736528526, "compression/movement_sparsity/importance_threshold": -0.0039755378842812, "compression/movement_sparsity/linear_layer_sparsity": 0.3379916286619596, "compression/movement_sparsity/model_sparsity": 0.32638057643355195, "compression_loss": 46.41075897216797, "distillation_loss": 2.2380435466766357, "epoch": 2.52, "learning_rate": 4.1570395416549266e-05, "loss": 47.5066, "step": 2978, "task_loss": 1.533159852027893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.432947695287051, "compression/movement_sparsity/importance_threshold": -0.003971483971047432, "compression/movement_sparsity/linear_layer_sparsity": 0.33872290209056416, "compression/movement_sparsity/model_sparsity": 0.32708672837022373, "compression_loss": 46.472537994384766, "distillation_loss": 1.2040711641311646, "epoch": 2.52, "learning_rate": 4.156569925800695e-05, "loss": 47.4991, "step": 2979, "task_loss": 0.7526586055755615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43352612329893603, "compression/movement_sparsity/importance_threshold": -0.0039674328146399665, "compression/movement_sparsity/linear_layer_sparsity": 0.3395400772228175, "compression/movement_sparsity/model_sparsity": 0.3278758310227594, "compression_loss": 46.53425598144531, "distillation_loss": 1.2190628051757812, "epoch": 2.52, "learning_rate": 4.156100309946464e-05, "loss": 47.353, "step": 2980, "task_loss": 0.8773961663246155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43410415782239276, "compression/movement_sparsity/importance_threshold": -0.003963384414121107, "compression/movement_sparsity/linear_layer_sparsity": 0.3403221118330479, "compression/movement_sparsity/model_sparsity": 0.32863100033830855, "compression_loss": 46.59595489501953, "distillation_loss": 0.99399733543396, "epoch": 2.52, "learning_rate": 4.155630694092233e-05, "loss": 47.7408, "step": 2981, "task_loss": 0.7053678631782532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4346817989913063, "compression/movement_sparsity/importance_threshold": -0.0039593387685531575, "compression/movement_sparsity/linear_layer_sparsity": 0.34111178983473295, "compression/movement_sparsity/model_sparsity": 0.32939355047130203, "compression_loss": 46.6575927734375, "distillation_loss": 0.9112321138381958, "epoch": 2.52, "learning_rate": 4.155161078238001e-05, "loss": 47.7786, "step": 2982, "task_loss": 0.38202929496765137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43525904693956197, "compression/movement_sparsity/importance_threshold": -0.003955295876998422, "compression/movement_sparsity/linear_layer_sparsity": 0.3418831403907617, "compression/movement_sparsity/model_sparsity": 0.3301384027627793, "compression_loss": 46.71919631958008, "distillation_loss": 0.5843592882156372, "epoch": 2.52, "learning_rate": 4.1546914623837704e-05, "loss": 47.5869, "step": 2983, "task_loss": 1.0483797788619995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4358359018010447, "compression/movement_sparsity/importance_threshold": -0.003951255738519205, "compression/movement_sparsity/linear_layer_sparsity": 0.34270929442124476, "compression/movement_sparsity/model_sparsity": 0.33093617586076823, "compression_loss": 46.78078079223633, "distillation_loss": 1.1728811264038086, "epoch": 2.52, "learning_rate": 4.154221846529539e-05, "loss": 48.0374, "step": 2984, "task_loss": 0.6150769591331482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43641236370963965, "compression/movement_sparsity/importance_threshold": -0.00394721835217781, "compression/movement_sparsity/linear_layer_sparsity": 0.3434724530741077, "compression/movement_sparsity/model_sparsity": 0.33167311766615465, "compression_loss": 46.84231185913086, "distillation_loss": 0.9625322818756104, "epoch": 2.52, "learning_rate": 4.153752230675308e-05, "loss": 47.7726, "step": 2985, "task_loss": 0.5456115007400513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43698843279923205, "compression/movement_sparsity/importance_threshold": -0.003943183717036542, "compression/movement_sparsity/linear_layer_sparsity": 0.3442326068367263, "compression/movement_sparsity/model_sparsity": 0.33240715780852087, "compression_loss": 46.90380096435547, "distillation_loss": 0.8806748390197754, "epoch": 2.52, "learning_rate": 4.153282614821077e-05, "loss": 47.7267, "step": 2986, "task_loss": 1.218402624130249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43756410920370714, "compression/movement_sparsity/importance_threshold": -0.003939151832157703, "compression/movement_sparsity/linear_layer_sparsity": 0.34499141316840204, "compression/movement_sparsity/model_sparsity": 0.3331398968083422, "compression_loss": 46.965213775634766, "distillation_loss": 0.9695619344711304, "epoch": 2.52, "learning_rate": 4.152812998966845e-05, "loss": 48.1035, "step": 2987, "task_loss": 0.7791354656219482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4381393930569496, "compression/movement_sparsity/importance_threshold": -0.0039351226966036, "compression/movement_sparsity/linear_layer_sparsity": 0.3458051064437057, "compression/movement_sparsity/model_sparsity": 0.33392563721642593, "compression_loss": 47.02659225463867, "distillation_loss": 1.5583608150482178, "epoch": 2.53, "learning_rate": 4.152343383112614e-05, "loss": 48.2211, "step": 2988, "task_loss": 1.8578460216522217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4387142844928451, "compression/movement_sparsity/importance_threshold": -0.003931096309436534, "compression/movement_sparsity/linear_layer_sparsity": 0.3465370237773626, "compression/movement_sparsity/model_sparsity": 0.33463241093803064, "compression_loss": 47.08794021606445, "distillation_loss": 1.1930773258209229, "epoch": 2.53, "learning_rate": 4.151873767258383e-05, "loss": 48.296, "step": 2989, "task_loss": 0.5154758095741272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4392887836452787, "compression/movement_sparsity/importance_threshold": -0.00392707266971881, "compression/movement_sparsity/linear_layer_sparsity": 0.34717675537102666, "compression/movement_sparsity/model_sparsity": 0.3352501657834077, "compression_loss": 47.14923095703125, "distillation_loss": 0.7614381909370422, "epoch": 2.53, "learning_rate": 4.1514041514041515e-05, "loss": 48.1385, "step": 2990, "task_loss": 0.5609140992164612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.43986289064813533, "compression/movement_sparsity/importance_threshold": -0.003923051776512731, "compression/movement_sparsity/linear_layer_sparsity": 0.34801950784285896, "compression/movement_sparsity/model_sparsity": 0.3360639671152227, "compression_loss": 47.21049880981445, "distillation_loss": 0.7212532758712769, "epoch": 2.53, "learning_rate": 4.15093453554992e-05, "loss": 47.9919, "step": 2991, "task_loss": 0.6041557788848877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4404366056353003, "compression/movement_sparsity/importance_threshold": -0.003919033628880603, "compression/movement_sparsity/linear_layer_sparsity": 0.3488089354370236, "compression/movement_sparsity/model_sparsity": 0.3368262754429645, "compression_loss": 47.27173614501953, "distillation_loss": 0.9841560125350952, "epoch": 2.53, "learning_rate": 4.150464919695689e-05, "loss": 48.3682, "step": 2992, "task_loss": 1.401531457901001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4410099287406586, "compression/movement_sparsity/importance_threshold": -0.003915018225884729, "compression/movement_sparsity/linear_layer_sparsity": 0.34959409417917464, "compression/movement_sparsity/model_sparsity": 0.3375844615668918, "compression_loss": 47.33286666870117, "distillation_loss": 1.1893310546875, "epoch": 2.53, "learning_rate": 4.149995303841458e-05, "loss": 48.6276, "step": 2993, "task_loss": 0.9419896006584167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44158286009809544, "compression/movement_sparsity/importance_threshold": -0.003911005566587413, "compression/movement_sparsity/linear_layer_sparsity": 0.3502905132657796, "compression/movement_sparsity/model_sparsity": 0.3382569565154362, "compression_loss": 47.39405059814453, "distillation_loss": 1.1872023344039917, "epoch": 2.53, "learning_rate": 4.149525687987227e-05, "loss": 48.2131, "step": 2994, "task_loss": 1.1679962873458862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.442155399841496, "compression/movement_sparsity/importance_threshold": -0.003906995650050959, "compression/movement_sparsity/linear_layer_sparsity": 0.3509968413356899, "compression/movement_sparsity/model_sparsity": 0.3389390200432258, "compression_loss": 47.455116271972656, "distillation_loss": 1.6993533372879028, "epoch": 2.53, "learning_rate": 4.1490560721329954e-05, "loss": 48.4907, "step": 2995, "task_loss": 1.8787879943847656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44272754810474546, "compression/movement_sparsity/importance_threshold": -0.00390298847533767, "compression/movement_sparsity/linear_layer_sparsity": 0.3516989959469277, "compression/movement_sparsity/model_sparsity": 0.3396170534834873, "compression_loss": 47.516204833984375, "distillation_loss": 0.6081333160400391, "epoch": 2.53, "learning_rate": 4.148586456278764e-05, "loss": 48.4946, "step": 2996, "task_loss": 0.8442394733428955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4432993050217289, "compression/movement_sparsity/importance_threshold": -0.0038989840415098514, "compression/movement_sparsity/linear_layer_sparsity": 0.3523986822554649, "compression/movement_sparsity/model_sparsity": 0.3402927034148394, "compression_loss": 47.577247619628906, "distillation_loss": 1.686255693435669, "epoch": 2.53, "learning_rate": 4.1481168404245327e-05, "loss": 48.5204, "step": 2997, "task_loss": 0.6181730628013611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44387067072633113, "compression/movement_sparsity/importance_threshold": -0.003894982347629808, "compression/movement_sparsity/linear_layer_sparsity": 0.3530277536432625, "compression/movement_sparsity/model_sparsity": 0.3409001642652162, "compression_loss": 47.638206481933594, "distillation_loss": 1.110093593597412, "epoch": 2.53, "learning_rate": 4.147647224570302e-05, "loss": 48.7818, "step": 2998, "task_loss": 1.2993837594985962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4444416453524378, "compression/movement_sparsity/importance_threshold": -0.003890983392759842, "compression/movement_sparsity/linear_layer_sparsity": 0.35370602414672564, "compression/movement_sparsity/model_sparsity": 0.34155513409028126, "compression_loss": 47.69911575317383, "distillation_loss": 1.4712867736816406, "epoch": 2.53, "learning_rate": 4.14717760871607e-05, "loss": 48.8644, "step": 2999, "task_loss": 0.6597622632980347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.445012229033934, "compression/movement_sparsity/importance_threshold": -0.0038869871759622567, "compression/movement_sparsity/linear_layer_sparsity": 0.35441882703966227, "compression/movement_sparsity/model_sparsity": 0.3422434500110073, "compression_loss": 47.760009765625, "distillation_loss": 1.3652691841125488, "epoch": 2.54, "learning_rate": 4.146707992861839e-05, "loss": 48.9519, "step": 3000, "task_loss": 1.496319055557251 }, { "epoch": 2.54, "eval_accuracy": 0.8596831683168317, "eval_loss": 48.44996643066406, "eval_runtime": 228.2323, "eval_samples_per_second": 110.633, "eval_steps_per_second": 0.868, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44558242190470465, "compression/movement_sparsity/importance_threshold": -0.0038829936962993575, "compression/movement_sparsity/linear_layer_sparsity": 0.3551592940015141, "compression/movement_sparsity/model_sparsity": 0.3429584796547767, "compression_loss": 47.820838928222656, "distillation_loss": 1.1190260648727417, "epoch": 2.54, "learning_rate": 4.146238377007608e-05, "loss": 48.8438, "step": 3001, "task_loss": 0.5882004499435425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.446152224098635, "compression/movement_sparsity/importance_threshold": -0.0038790029528334483, "compression/movement_sparsity/linear_layer_sparsity": 0.35590436369207334, "compression/movement_sparsity/model_sparsity": 0.3436779539093628, "compression_loss": 47.881649017333984, "distillation_loss": 1.0594462156295776, "epoch": 2.54, "learning_rate": 4.145768761153377e-05, "loss": 49.1306, "step": 3002, "task_loss": 1.8961745500564575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4467216357496101, "compression/movement_sparsity/importance_threshold": -0.0038750149446268333, "compression/movement_sparsity/linear_layer_sparsity": 0.35665101929692816, "compression/movement_sparsity/model_sparsity": 0.34439895959720956, "compression_loss": 47.94240951538086, "distillation_loss": 0.5419052839279175, "epoch": 2.54, "learning_rate": 4.145299145299146e-05, "loss": 49.1317, "step": 3003, "task_loss": 1.178877592086792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44729065699151516, "compression/movement_sparsity/importance_threshold": -0.0038710296707418156, "compression/movement_sparsity/linear_layer_sparsity": 0.35721275491008603, "compression/movement_sparsity/model_sparsity": 0.34494139786395456, "compression_loss": 48.00312805175781, "distillation_loss": 1.1679743528366089, "epoch": 2.54, "learning_rate": 4.144829529444914e-05, "loss": 49.3554, "step": 3004, "task_loss": 1.26522696018219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4478592879582354, "compression/movement_sparsity/importance_threshold": -0.0038670471302406995, "compression/movement_sparsity/linear_layer_sparsity": 0.3578508648169516, "compression/movement_sparsity/model_sparsity": 0.3455575867324636, "compression_loss": 48.063846588134766, "distillation_loss": 0.7495870590209961, "epoch": 2.54, "learning_rate": 4.144359913590683e-05, "loss": 48.8428, "step": 3005, "task_loss": 1.313217282295227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4484275287836559, "compression/movement_sparsity/importance_threshold": -0.003863067322185789, "compression/movement_sparsity/linear_layer_sparsity": 0.35861726684341144, "compression/movement_sparsity/model_sparsity": 0.34629766049158617, "compression_loss": 48.124446868896484, "distillation_loss": 1.5468318462371826, "epoch": 2.54, "learning_rate": 4.143890297736452e-05, "loss": 49.4221, "step": 3006, "task_loss": 1.7378158569335938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4489953796016617, "compression/movement_sparsity/importance_threshold": -0.0038590902456393885, "compression/movement_sparsity/linear_layer_sparsity": 0.35940969932782035, "compression/movement_sparsity/model_sparsity": 0.3470628704823482, "compression_loss": 48.185035705566406, "distillation_loss": 0.7872080206871033, "epoch": 2.54, "learning_rate": 4.143420681882221e-05, "loss": 48.995, "step": 3007, "task_loss": 0.330612450838089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4495628405461377, "compression/movement_sparsity/importance_threshold": -0.0038551158996638036, "compression/movement_sparsity/linear_layer_sparsity": 0.3601791658653626, "compression/movement_sparsity/model_sparsity": 0.3478059034771699, "compression_loss": 48.24555587768555, "distillation_loss": 1.0740365982055664, "epoch": 2.54, "learning_rate": 4.142951066027989e-05, "loss": 49.3152, "step": 3008, "task_loss": 1.4286096096038818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4501299117509697, "compression/movement_sparsity/importance_threshold": -0.003851144283321334, "compression/movement_sparsity/linear_layer_sparsity": 0.36090744632789057, "compression/movement_sparsity/model_sparsity": 0.34850916526535725, "compression_loss": 48.30609893798828, "distillation_loss": 0.64019775390625, "epoch": 2.54, "learning_rate": 4.142481450173758e-05, "loss": 49.0841, "step": 3009, "task_loss": 0.7700440883636475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45069659335004253, "compression/movement_sparsity/importance_threshold": -0.0038471753956742867, "compression/movement_sparsity/linear_layer_sparsity": 0.36162944275407444, "compression/movement_sparsity/model_sparsity": 0.3492063588931809, "compression_loss": 48.36655807495117, "distillation_loss": 0.82325279712677, "epoch": 2.54, "learning_rate": 4.142011834319527e-05, "loss": 49.1442, "step": 3010, "task_loss": 0.943035364151001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45126288547724125, "compression/movement_sparsity/importance_threshold": -0.0038432092357849647, "compression/movement_sparsity/linear_layer_sparsity": 0.3622767938908578, "compression/movement_sparsity/model_sparsity": 0.3498314715269307, "compression_loss": 48.426971435546875, "distillation_loss": 0.7042733430862427, "epoch": 2.54, "learning_rate": 4.1415422184652956e-05, "loss": 49.4364, "step": 3011, "task_loss": 0.8579145073890686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4518287882664511, "compression/movement_sparsity/importance_threshold": -0.0038392458027156715, "compression/movement_sparsity/linear_layer_sparsity": 0.36308425082648793, "compression/movement_sparsity/model_sparsity": 0.3506111898327939, "compression_loss": 48.48737335205078, "distillation_loss": 0.6257011890411377, "epoch": 2.55, "learning_rate": 4.141072602611064e-05, "loss": 49.236, "step": 3012, "task_loss": 0.36222517490386963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45239430185155705, "compression/movement_sparsity/importance_threshold": -0.0038352850955287138, "compression/movement_sparsity/linear_layer_sparsity": 0.363910440629474, "compression/movement_sparsity/model_sparsity": 0.3514089974743902, "compression_loss": 48.54767990112305, "distillation_loss": 1.6978936195373535, "epoch": 2.55, "learning_rate": 4.140602986756833e-05, "loss": 49.9977, "step": 3013, "task_loss": 0.5435124635696411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4529594263664445, "compression/movement_sparsity/importance_threshold": -0.003831327113286392, "compression/movement_sparsity/linear_layer_sparsity": 0.3645647316318214, "compression/movement_sparsity/model_sparsity": 0.35204081156797246, "compression_loss": 48.60795211791992, "distillation_loss": 1.3760325908660889, "epoch": 2.55, "learning_rate": 4.140133370902602e-05, "loss": 49.7061, "step": 3014, "task_loss": 0.5093219876289368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45352416194499834, "compression/movement_sparsity/importance_threshold": -0.003827371855051012, "compression/movement_sparsity/linear_layer_sparsity": 0.3653375130879664, "compression/movement_sparsity/model_sparsity": 0.3527870456037451, "compression_loss": 48.668235778808594, "distillation_loss": 0.7905077934265137, "epoch": 2.55, "learning_rate": 4.139663755048371e-05, "loss": 49.5013, "step": 3015, "task_loss": 0.4139029383659363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45408850872110396, "compression/movement_sparsity/importance_threshold": -0.0038234193198848775, "compression/movement_sparsity/linear_layer_sparsity": 0.36603408718875063, "compression/movement_sparsity/model_sparsity": 0.35345969024125473, "compression_loss": 48.72843551635742, "distillation_loss": 1.47310471534729, "epoch": 2.55, "learning_rate": 4.1391941391941394e-05, "loss": 50.0369, "step": 3016, "task_loss": 1.5562087297439575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45465246682864624, "compression/movement_sparsity/importance_threshold": -0.003819469506850293, "compression/movement_sparsity/linear_layer_sparsity": 0.36682802211828164, "compression/movement_sparsity/model_sparsity": 0.35422635106352685, "compression_loss": 48.78861618041992, "distillation_loss": 0.8547554016113281, "epoch": 2.55, "learning_rate": 4.138724523339908e-05, "loss": 49.8, "step": 3017, "task_loss": 1.096545696258545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4552160364015102, "compression/movement_sparsity/importance_threshold": -0.0038155224150095636, "compression/movement_sparsity/linear_layer_sparsity": 0.36765699025232684, "compression/movement_sparsity/model_sparsity": 0.35502684159196335, "compression_loss": 48.848758697509766, "distillation_loss": 0.8751019239425659, "epoch": 2.55, "learning_rate": 4.138254907485677e-05, "loss": 49.6146, "step": 3018, "task_loss": 0.6735438704490662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45577921757358153, "compression/movement_sparsity/importance_threshold": -0.0038115780434249897, "compression/movement_sparsity/linear_layer_sparsity": 0.3684541685554548, "compression/movement_sparsity/model_sparsity": 0.35579663436797154, "compression_loss": 48.908836364746094, "distillation_loss": 1.1700892448425293, "epoch": 2.55, "learning_rate": 4.137785291631446e-05, "loss": 49.8204, "step": 3019, "task_loss": 1.5246810913085938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.456342010478745, "compression/movement_sparsity/importance_threshold": -0.003807636391158877, "compression/movement_sparsity/linear_layer_sparsity": 0.3693198273533156, "compression/movement_sparsity/model_sparsity": 0.35663255512304787, "compression_loss": 48.96885681152344, "distillation_loss": 0.6335422992706299, "epoch": 2.55, "learning_rate": 4.1373156757772146e-05, "loss": 49.8475, "step": 3020, "task_loss": 0.4357015788555145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4569044152508859, "compression/movement_sparsity/importance_threshold": -0.0038036974572735302, "compression/movement_sparsity/linear_layer_sparsity": 0.37017668611546106, "compression/movement_sparsity/model_sparsity": 0.35745997815070774, "compression_loss": 49.02882766723633, "distillation_loss": 0.8161166906356812, "epoch": 2.55, "learning_rate": 4.136846059922983e-05, "loss": 50.0513, "step": 3021, "task_loss": 0.6460744142532349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45746643202388926, "compression/movement_sparsity/importance_threshold": -0.003799761240831252, "compression/movement_sparsity/linear_layer_sparsity": 0.3710223003875259, "compression/movement_sparsity/model_sparsity": 0.3582765429711134, "compression_loss": 49.08876037597656, "distillation_loss": 0.9189770221710205, "epoch": 2.55, "learning_rate": 4.136376444068752e-05, "loss": 50.2145, "step": 3022, "task_loss": 1.298644781112671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45802806093164017, "compression/movement_sparsity/importance_threshold": -0.0037958277408943484, "compression/movement_sparsity/linear_layer_sparsity": 0.3716790954650769, "compression/movement_sparsity/model_sparsity": 0.35891077511721253, "compression_loss": 49.14867401123047, "distillation_loss": 0.6936419010162354, "epoch": 2.56, "learning_rate": 4.1359068282145205e-05, "loss": 49.9848, "step": 3023, "task_loss": 0.2733449935913086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45858930210802396, "compression/movement_sparsity/importance_threshold": -0.0037918969565251207, "compression/movement_sparsity/linear_layer_sparsity": 0.37237336820150735, "compression/movement_sparsity/model_sparsity": 0.3595811974493138, "compression_loss": 49.208499908447266, "distillation_loss": 1.0767335891723633, "epoch": 2.56, "learning_rate": 4.13543721236029e-05, "loss": 50.1063, "step": 3024, "task_loss": 0.5466818809509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4591501556869255, "compression/movement_sparsity/importance_threshold": -0.0037879688867858757, "compression/movement_sparsity/linear_layer_sparsity": 0.3731552954942291, "compression/movement_sparsity/model_sparsity": 0.3603362631340409, "compression_loss": 49.26829147338867, "distillation_loss": 0.908270001411438, "epoch": 2.56, "learning_rate": 4.134967596506058e-05, "loss": 50.0475, "step": 3025, "task_loss": 0.8433714509010315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4597106218022302, "compression/movement_sparsity/importance_threshold": -0.0037840435307389154, "compression/movement_sparsity/linear_layer_sparsity": 0.37391599776855894, "compression/movement_sparsity/model_sparsity": 0.3610708329450536, "compression_loss": 49.32806396484375, "distillation_loss": 0.8883118629455566, "epoch": 2.56, "learning_rate": 4.134497980651827e-05, "loss": 50.2666, "step": 3026, "task_loss": 1.3919849395751953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46027070058782316, "compression/movement_sparsity/importance_threshold": -0.003780120887446544, "compression/movement_sparsity/linear_layer_sparsity": 0.37479120782469605, "compression/movement_sparsity/model_sparsity": 0.3619159768433013, "compression_loss": 49.387760162353516, "distillation_loss": 0.9273070096969604, "epoch": 2.56, "learning_rate": 4.134028364797596e-05, "loss": 50.3281, "step": 3027, "task_loss": 0.4517025649547577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46083039217758914, "compression/movement_sparsity/importance_threshold": -0.0037762009559710675, "compression/movement_sparsity/linear_layer_sparsity": 0.3756021466172758, "compression/movement_sparsity/model_sparsity": 0.3626990573936164, "compression_loss": 49.447383880615234, "distillation_loss": 0.6709448099136353, "epoch": 2.56, "learning_rate": 4.1335587489433644e-05, "loss": 50.7968, "step": 3028, "task_loss": 0.4689825177192688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46138969670541374, "compression/movement_sparsity/importance_threshold": -0.0037722837353747875, "compression/movement_sparsity/linear_layer_sparsity": 0.3764737436506192, "compression/movement_sparsity/model_sparsity": 0.3635407123875184, "compression_loss": 49.5069694519043, "distillation_loss": 0.9528563022613525, "epoch": 2.56, "learning_rate": 4.133089133089133e-05, "loss": 50.6858, "step": 3029, "task_loss": 0.37805211544036865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4619486143051821, "compression/movement_sparsity/importance_threshold": -0.0037683692247200075, "compression/movement_sparsity/linear_layer_sparsity": 0.3771501539839312, "compression/movement_sparsity/model_sparsity": 0.3641938859449995, "compression_loss": 49.5665397644043, "distillation_loss": 1.7192420959472656, "epoch": 2.56, "learning_rate": 4.1326195172349016e-05, "loss": 50.7692, "step": 3030, "task_loss": 0.5860130190849304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46250714511077906, "compression/movement_sparsity/importance_threshold": -0.003764457423069034, "compression/movement_sparsity/linear_layer_sparsity": 0.37780412303375244, "compression/movement_sparsity/model_sparsity": 0.3648253891461153, "compression_loss": 49.62607955932617, "distillation_loss": 1.0017911195755005, "epoch": 2.56, "learning_rate": 4.132149901380671e-05, "loss": 50.4827, "step": 3031, "task_loss": 1.1313321590423584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46306528925609003, "compression/movement_sparsity/importance_threshold": -0.0037605483294841685, "compression/movement_sparsity/linear_layer_sparsity": 0.37836743263703826, "compression/movement_sparsity/model_sparsity": 0.3653693473315852, "compression_loss": 49.685585021972656, "distillation_loss": 1.3471577167510986, "epoch": 2.56, "learning_rate": 4.1316802855264396e-05, "loss": 50.8951, "step": 3032, "task_loss": 1.1773598194122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.463623046875, "compression/movement_sparsity/importance_threshold": -0.0037566419430277165, "compression/movement_sparsity/linear_layer_sparsity": 0.3791244861160716, "compression/movement_sparsity/model_sparsity": 0.3661003936946448, "compression_loss": 49.74504470825195, "distillation_loss": 0.7267760038375854, "epoch": 2.56, "learning_rate": 4.131210669672209e-05, "loss": 50.8365, "step": 3033, "task_loss": 1.2858234643936157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4641804181013941, "compression/movement_sparsity/importance_threshold": -0.003752738262761982, "compression/movement_sparsity/linear_layer_sparsity": 0.3800208854180976, "compression/movement_sparsity/model_sparsity": 0.3669659989229994, "compression_loss": 49.804466247558594, "distillation_loss": 1.079267978668213, "epoch": 2.56, "learning_rate": 4.130741053817977e-05, "loss": 50.9071, "step": 3034, "task_loss": 1.036117434501648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46473740306915756, "compression/movement_sparsity/importance_threshold": -0.003748837287749268, "compression/movement_sparsity/linear_layer_sparsity": 0.3807544721352235, "compression/movement_sparsity/model_sparsity": 0.36767438467961533, "compression_loss": 49.863868713378906, "distillation_loss": 0.864710807800293, "epoch": 2.57, "learning_rate": 4.130271437963746e-05, "loss": 50.7787, "step": 3035, "task_loss": 0.7965424060821533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4652940019121755, "compression/movement_sparsity/importance_threshold": -0.003744939017051879, "compression/movement_sparsity/linear_layer_sparsity": 0.38147685013477173, "compression/movement_sparsity/model_sparsity": 0.36837194677258445, "compression_loss": 49.92317199707031, "distillation_loss": 1.1272435188293457, "epoch": 2.57, "learning_rate": 4.129801822109515e-05, "loss": 50.9183, "step": 3036, "task_loss": 1.9793462753295898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46585021476433297, "compression/movement_sparsity/importance_threshold": -0.00374104344973212, "compression/movement_sparsity/linear_layer_sparsity": 0.38217776463257536, "compression/movement_sparsity/model_sparsity": 0.36904878270112335, "compression_loss": 49.982486724853516, "distillation_loss": 0.7878408432006836, "epoch": 2.57, "learning_rate": 4.1293322062552834e-05, "loss": 51.0231, "step": 3037, "task_loss": 1.057770013809204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4664060417595153, "compression/movement_sparsity/importance_threshold": -0.003737150584852293, "compression/movement_sparsity/linear_layer_sparsity": 0.3828524340374125, "compression/movement_sparsity/model_sparsity": 0.36970027513637843, "compression_loss": 50.04176712036133, "distillation_loss": 0.6824671030044556, "epoch": 2.57, "learning_rate": 4.128862590401052e-05, "loss": 50.9101, "step": 3038, "task_loss": 0.7235848307609558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4669614830316071, "compression/movement_sparsity/importance_threshold": -0.0037332604214747054, "compression/movement_sparsity/linear_layer_sparsity": 0.38356084883665914, "compression/movement_sparsity/model_sparsity": 0.3703843537079321, "compression_loss": 50.10099411010742, "distillation_loss": 2.0175516605377197, "epoch": 2.57, "learning_rate": 4.128392974546821e-05, "loss": 51.5324, "step": 3039, "task_loss": 1.3853259086608887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4675165387144943, "compression/movement_sparsity/importance_threshold": -0.003729372958661657, "compression/movement_sparsity/linear_layer_sparsity": 0.3843623317643037, "compression/movement_sparsity/model_sparsity": 0.3711583032313622, "compression_loss": 50.160194396972656, "distillation_loss": 1.2447974681854248, "epoch": 2.57, "learning_rate": 4.12792335869259e-05, "loss": 51.314, "step": 3040, "task_loss": 1.5853455066680908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46807120894206156, "compression/movement_sparsity/importance_threshold": -0.0037254881954754534, "compression/movement_sparsity/linear_layer_sparsity": 0.38501275933633705, "compression/movement_sparsity/model_sparsity": 0.371786386615347, "compression_loss": 50.21932601928711, "distillation_loss": 0.7990949153900146, "epoch": 2.57, "learning_rate": 4.1274537428383586e-05, "loss": 51.0394, "step": 3041, "task_loss": 0.6759595274925232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4686254938481941, "compression/movement_sparsity/importance_threshold": -0.0037216061309783993, "compression/movement_sparsity/linear_layer_sparsity": 0.3857994682202808, "compression/movement_sparsity/model_sparsity": 0.3725460696289276, "compression_loss": 50.27841567993164, "distillation_loss": 1.5597389936447144, "epoch": 2.57, "learning_rate": 4.126984126984127e-05, "loss": 51.5301, "step": 3042, "task_loss": 0.8850700855255127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4691793935667772, "compression/movement_sparsity/importance_threshold": -0.003717726764232797, "compression/movement_sparsity/linear_layer_sparsity": 0.38665283320130894, "compression/movement_sparsity/model_sparsity": 0.37337011889759975, "compression_loss": 50.33747100830078, "distillation_loss": 1.25918447971344, "epoch": 2.57, "learning_rate": 4.126514511129896e-05, "loss": 51.408, "step": 3043, "task_loss": 1.4573487043380737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4697329082316958, "compression/movement_sparsity/importance_threshold": -0.0037138500943009523, "compression/movement_sparsity/linear_layer_sparsity": 0.38737806107692213, "compression/movement_sparsity/model_sparsity": 0.3740704329646237, "compression_loss": 50.39646911621094, "distillation_loss": 1.6351810693740845, "epoch": 2.57, "learning_rate": 4.1260448952756645e-05, "loss": 51.6377, "step": 3044, "task_loss": 1.0610618591308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4702860379768351, "compression/movement_sparsity/importance_threshold": -0.0037099761202451687, "compression/movement_sparsity/linear_layer_sparsity": 0.3881967148059623, "compression/movement_sparsity/model_sparsity": 0.3748609634195979, "compression_loss": 50.455448150634766, "distillation_loss": 1.4475548267364502, "epoch": 2.57, "learning_rate": 4.125575279421434e-05, "loss": 51.6234, "step": 3045, "task_loss": 1.4041566848754883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4708387829360803, "compression/movement_sparsity/importance_threshold": -0.00370610484112775, "compression/movement_sparsity/linear_layer_sparsity": 0.38897395590080314, "compression/movement_sparsity/model_sparsity": 0.3756115038917576, "compression_loss": 50.51437759399414, "distillation_loss": 0.965828537940979, "epoch": 2.57, "learning_rate": 4.125105663567202e-05, "loss": 51.2874, "step": 3046, "task_loss": 0.6727010607719421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47139114324331644, "compression/movement_sparsity/importance_threshold": -0.003702236256011, "compression/movement_sparsity/linear_layer_sparsity": 0.38978890121370857, "compression/movement_sparsity/model_sparsity": 0.3763984533260997, "compression_loss": 50.57326889038086, "distillation_loss": 0.9208170175552368, "epoch": 2.58, "learning_rate": 4.124636047712971e-05, "loss": 51.3711, "step": 3047, "task_loss": 0.9423845410346985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4719431190324288, "compression/movement_sparsity/importance_threshold": -0.0036983703639572223, "compression/movement_sparsity/linear_layer_sparsity": 0.3904407000650201, "compression/movement_sparsity/model_sparsity": 0.3770278608817009, "compression_loss": 50.63212585449219, "distillation_loss": 1.2266002893447876, "epoch": 2.58, "learning_rate": 4.12416643185874e-05, "loss": 51.55, "step": 3048, "task_loss": 1.8993918895721436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4724947104373022, "compression/movement_sparsity/importance_threshold": -0.0036945071640287234, "compression/movement_sparsity/linear_layer_sparsity": 0.39135028749645134, "compression/movement_sparsity/model_sparsity": 0.3779062011866443, "compression_loss": 50.69087219238281, "distillation_loss": 1.4201915264129639, "epoch": 2.58, "learning_rate": 4.1236968160045084e-05, "loss": 51.8787, "step": 3049, "task_loss": 1.2274408340454102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47304591759182224, "compression/movement_sparsity/importance_threshold": -0.0036906466552878037, "compression/movement_sparsity/linear_layer_sparsity": 0.392159330346377, "compression/movement_sparsity/model_sparsity": 0.3786874509257681, "compression_loss": 50.749637603759766, "distillation_loss": 1.0182191133499146, "epoch": 2.58, "learning_rate": 4.123227200150278e-05, "loss": 52.0471, "step": 3050, "task_loss": 1.4530220031738281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4735967406298738, "compression/movement_sparsity/importance_threshold": -0.0036867888367967693, "compression/movement_sparsity/linear_layer_sparsity": 0.3931106762128691, "compression/movement_sparsity/model_sparsity": 0.3796061151350639, "compression_loss": 50.8083381652832, "distillation_loss": 0.9240841269493103, "epoch": 2.58, "learning_rate": 4.122757584296046e-05, "loss": 51.8435, "step": 3051, "task_loss": 0.48819324374198914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47414717968534215, "compression/movement_sparsity/importance_threshold": -0.003682933707617923, "compression/movement_sparsity/linear_layer_sparsity": 0.3937904491614544, "compression/movement_sparsity/model_sparsity": 0.38026253579163916, "compression_loss": 50.86696243286133, "distillation_loss": 1.5159823894500732, "epoch": 2.58, "learning_rate": 4.122287968441815e-05, "loss": 52.0238, "step": 3052, "task_loss": 1.1704357862472534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47469723489211224, "compression/movement_sparsity/importance_threshold": -0.0036790812668135693, "compression/movement_sparsity/linear_layer_sparsity": 0.3945180141739242, "compression/movement_sparsity/model_sparsity": 0.38096510670767886, "compression_loss": 50.92556381225586, "distillation_loss": 1.1936275959014893, "epoch": 2.58, "learning_rate": 4.1218183525875836e-05, "loss": 51.7775, "step": 3053, "task_loss": 0.8866280317306519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4752469063840694, "compression/movement_sparsity/importance_threshold": -0.0036752315134460124, "compression/movement_sparsity/linear_layer_sparsity": 0.39528418964119894, "compression/movement_sparsity/model_sparsity": 0.38170496169062124, "compression_loss": 50.9841194152832, "distillation_loss": 0.818263828754425, "epoch": 2.58, "learning_rate": 4.121348736733352e-05, "loss": 52.0774, "step": 3054, "task_loss": 1.1661381721496582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4757961942950987, "compression/movement_sparsity/importance_threshold": -0.0036713844465775564, "compression/movement_sparsity/linear_layer_sparsity": 0.3960466089956684, "compression/movement_sparsity/model_sparsity": 0.3824411895947884, "compression_loss": 51.04262161254883, "distillation_loss": 1.4447181224822998, "epoch": 2.58, "learning_rate": 4.120879120879121e-05, "loss": 52.3121, "step": 3055, "task_loss": 1.1622289419174194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47634509875908515, "compression/movement_sparsity/importance_threshold": -0.0036675400652705055, "compression/movement_sparsity/linear_layer_sparsity": 0.39678727866837, "compression/movement_sparsity/model_sparsity": 0.3831564149856663, "compression_loss": 51.10111618041992, "distillation_loss": 0.9131793975830078, "epoch": 2.58, "learning_rate": 4.1204095050248895e-05, "loss": 52.1258, "step": 3056, "task_loss": 0.8300709128379822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4768936199099141, "compression/movement_sparsity/importance_threshold": -0.003663698368587163, "compression/movement_sparsity/linear_layer_sparsity": 0.3974621984807275, "compression/movement_sparsity/model_sparsity": 0.3838081492261731, "compression_loss": 51.15959167480469, "distillation_loss": 0.8873772621154785, "epoch": 2.58, "learning_rate": 4.119939889170659e-05, "loss": 52.231, "step": 3057, "task_loss": 0.9160637259483337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4774417578814706, "compression/movement_sparsity/importance_threshold": -0.0036598593555898324, "compression/movement_sparsity/linear_layer_sparsity": 0.3983995096019121, "compression/movement_sparsity/model_sparsity": 0.38471326082683877, "compression_loss": 51.21798324584961, "distillation_loss": 1.5619075298309326, "epoch": 2.58, "learning_rate": 4.1194702733164275e-05, "loss": 52.5645, "step": 3058, "task_loss": 1.3484995365142822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47798951280763957, "compression/movement_sparsity/importance_threshold": -0.00365602302534082, "compression/movement_sparsity/linear_layer_sparsity": 0.3990665475394623, "compression/movement_sparsity/model_sparsity": 0.3853573839591854, "compression_loss": 51.2763557434082, "distillation_loss": 0.710129976272583, "epoch": 2.59, "learning_rate": 4.119000657462196e-05, "loss": 52.3275, "step": 3059, "task_loss": 0.647619903087616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4785368848223065, "compression/movement_sparsity/importance_threshold": -0.003652189376902428, "compression/movement_sparsity/linear_layer_sparsity": 0.4000348495723326, "compression/movement_sparsity/model_sparsity": 0.3862924218383811, "compression_loss": 51.33466339111328, "distillation_loss": 0.5760276317596436, "epoch": 2.59, "learning_rate": 4.118531041607965e-05, "loss": 52.5166, "step": 3060, "task_loss": 1.2770187854766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4790838740593565, "compression/movement_sparsity/importance_threshold": -0.0036483584093369593, "compression/movement_sparsity/linear_layer_sparsity": 0.4006868511344939, "compression/movement_sparsity/model_sparsity": 0.38692202514109075, "compression_loss": 51.39292907714844, "distillation_loss": 0.5586639046669006, "epoch": 2.59, "learning_rate": 4.1180614257537334e-05, "loss": 52.1561, "step": 3061, "task_loss": 0.3985616862773895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47963048065267455, "compression/movement_sparsity/importance_threshold": -0.0036445301217067193, "compression/movement_sparsity/linear_layer_sparsity": 0.40141410611860523, "compression/movement_sparsity/model_sparsity": 0.38762429667919984, "compression_loss": 51.4511833190918, "distillation_loss": 1.1474469900131226, "epoch": 2.59, "learning_rate": 4.117591809899503e-05, "loss": 52.6888, "step": 3062, "task_loss": 0.9431793689727783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48017670473614593, "compression/movement_sparsity/importance_threshold": -0.0036407045130740115, "compression/movement_sparsity/linear_layer_sparsity": 0.40221552942541156, "compression/movement_sparsity/model_sparsity": 0.3883981886299509, "compression_loss": 51.5093879699707, "distillation_loss": 1.1265697479248047, "epoch": 2.59, "learning_rate": 4.117122194045271e-05, "loss": 53.1513, "step": 3063, "task_loss": 2.025576114654541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48072254644365564, "compression/movement_sparsity/importance_threshold": -0.003636881582501141, "compression/movement_sparsity/linear_layer_sparsity": 0.4029224537037037, "compression/movement_sparsity/model_sparsity": 0.38908082788453024, "compression_loss": 51.56752395629883, "distillation_loss": 1.0329691171646118, "epoch": 2.59, "learning_rate": 4.11665257819104e-05, "loss": 52.7553, "step": 3064, "task_loss": 1.479643702507019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4812680059090889, "compression/movement_sparsity/importance_threshold": -0.0036330613290504105, "compression/movement_sparsity/linear_layer_sparsity": 0.4036648643048802, "compression/movement_sparsity/model_sparsity": 0.38979773439763415, "compression_loss": 51.62565231323242, "distillation_loss": 2.521170139312744, "epoch": 2.59, "learning_rate": 4.1161829623368086e-05, "loss": 53.0655, "step": 3065, "task_loss": 2.259763479232788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4818130832663309, "compression/movement_sparsity/importance_threshold": -0.003629243751784124, "compression/movement_sparsity/linear_layer_sparsity": 0.40445003497119886, "compression/movement_sparsity/model_sparsity": 0.3905559320360973, "compression_loss": 51.683719635009766, "distillation_loss": 0.7968692183494568, "epoch": 2.59, "learning_rate": 4.115713346482578e-05, "loss": 52.5418, "step": 3066, "task_loss": 0.3419604003429413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48235777864926654, "compression/movement_sparsity/importance_threshold": -0.0036254288497645872, "compression/movement_sparsity/linear_layer_sparsity": 0.40522869504198833, "compression/movement_sparsity/model_sparsity": 0.3913078427380166, "compression_loss": 51.741756439208984, "distillation_loss": 0.9649976491928101, "epoch": 2.59, "learning_rate": 4.1152437306283465e-05, "loss": 53.0487, "step": 3067, "task_loss": 2.7186105251312256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48290209219178126, "compression/movement_sparsity/importance_threshold": -0.003621616622054102, "compression/movement_sparsity/linear_layer_sparsity": 0.4060898107319798, "compression/movement_sparsity/model_sparsity": 0.39213937645495517, "compression_loss": 51.79972457885742, "distillation_loss": 1.25900137424469, "epoch": 2.59, "learning_rate": 4.1147741147741145e-05, "loss": 53.079, "step": 3068, "task_loss": 1.3018369674682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4834460240277598, "compression/movement_sparsity/importance_threshold": -0.003617807067714975, "compression/movement_sparsity/linear_layer_sparsity": 0.4067707403248258, "compression/movement_sparsity/model_sparsity": 0.3927969140215024, "compression_loss": 51.85771942138672, "distillation_loss": 1.1364296674728394, "epoch": 2.59, "learning_rate": 4.114304498919884e-05, "loss": 52.9511, "step": 3069, "task_loss": 0.5344680547714233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4839895742910879, "compression/movement_sparsity/importance_threshold": -0.003614000185809506, "compression/movement_sparsity/linear_layer_sparsity": 0.4074406519867762, "compression/movement_sparsity/model_sparsity": 0.3934438121569755, "compression_loss": 51.91560363769531, "distillation_loss": 1.3081576824188232, "epoch": 2.59, "learning_rate": 4.1138348830656524e-05, "loss": 52.9715, "step": 3070, "task_loss": 1.148207426071167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4845327431156503, "compression/movement_sparsity/importance_threshold": -0.0036101959754000017, "compression/movement_sparsity/linear_layer_sparsity": 0.40820102038441225, "compression/movement_sparsity/model_sparsity": 0.394178059560986, "compression_loss": 51.97350311279297, "distillation_loss": 0.9547584056854248, "epoch": 2.6, "learning_rate": 4.113365267211422e-05, "loss": 53.2431, "step": 3071, "task_loss": 1.34943425655365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48507553063533226, "compression/movement_sparsity/importance_threshold": -0.003606394435548766, "compression/movement_sparsity/linear_layer_sparsity": 0.40889115543467314, "compression/movement_sparsity/model_sparsity": 0.39484448634916663, "compression_loss": 52.03136444091797, "distillation_loss": 1.7997688055038452, "epoch": 2.6, "learning_rate": 4.11289565135719e-05, "loss": 53.5553, "step": 3072, "task_loss": 1.2421348094940186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4856179369840188, "compression/movement_sparsity/importance_threshold": -0.0036025955653181028, "compression/movement_sparsity/linear_layer_sparsity": 0.4097182514743995, "compression/movement_sparsity/model_sparsity": 0.39564316909548336, "compression_loss": 52.089202880859375, "distillation_loss": 1.3371782302856445, "epoch": 2.6, "learning_rate": 4.112426035502959e-05, "loss": 53.4844, "step": 3073, "task_loss": 0.8282544612884521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4861599622955952, "compression/movement_sparsity/importance_threshold": -0.0035987993637703153, "compression/movement_sparsity/linear_layer_sparsity": 0.4104068244587, "compression/movement_sparsity/model_sparsity": 0.39630808747947494, "compression_loss": 52.1469612121582, "distillation_loss": 1.2503807544708252, "epoch": 2.6, "learning_rate": 4.1119564196487276e-05, "loss": 53.2571, "step": 3074, "task_loss": 1.4017322063446045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48670160670394647, "compression/movement_sparsity/importance_threshold": -0.003595005829967709, "compression/movement_sparsity/linear_layer_sparsity": 0.41106533661639055, "compression/movement_sparsity/model_sparsity": 0.3969439777187284, "compression_loss": 52.2047119140625, "distillation_loss": 1.8732569217681885, "epoch": 2.6, "learning_rate": 4.111486803794496e-05, "loss": 53.2659, "step": 3075, "task_loss": 1.2021640539169312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4872428703429579, "compression/movement_sparsity/importance_threshold": -0.0035912149629725863, "compression/movement_sparsity/linear_layer_sparsity": 0.4119359200954849, "compression/movement_sparsity/model_sparsity": 0.39778465397708784, "compression_loss": 52.26237106323242, "distillation_loss": 1.1679730415344238, "epoch": 2.6, "learning_rate": 4.111017187940265e-05, "loss": 53.5597, "step": 3076, "task_loss": 1.2722549438476562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48778375334651447, "compression/movement_sparsity/importance_threshold": -0.003587426761847252, "compression/movement_sparsity/linear_layer_sparsity": 0.41250750507111, "compression/movement_sparsity/model_sparsity": 0.39833660325039916, "compression_loss": 52.32002258300781, "distillation_loss": 0.9417812824249268, "epoch": 2.6, "learning_rate": 4.1105475720860335e-05, "loss": 53.3077, "step": 3077, "task_loss": 1.0897536277770996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4883242558485015, "compression/movement_sparsity/importance_threshold": -0.003583641225654009, "compression/movement_sparsity/linear_layer_sparsity": 0.4132177681163402, "compression/movement_sparsity/model_sparsity": 0.39902246657500096, "compression_loss": 52.37763214111328, "distillation_loss": 1.0463371276855469, "epoch": 2.6, "learning_rate": 4.110077956231803e-05, "loss": 53.3656, "step": 3078, "task_loss": 1.164680004119873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4888643779828038, "compression/movement_sparsity/importance_threshold": -0.0035798583534551644, "compression/movement_sparsity/linear_layer_sparsity": 0.41401896486396145, "compression/movement_sparsity/model_sparsity": 0.39979613974957195, "compression_loss": 52.4351692199707, "distillation_loss": 0.8574877381324768, "epoch": 2.6, "learning_rate": 4.1096083403775715e-05, "loss": 53.4403, "step": 3079, "task_loss": 0.6933873891830444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4894041198833069, "compression/movement_sparsity/importance_threshold": -0.0035760781443130185, "compression/movement_sparsity/linear_layer_sparsity": 0.4147886937331917, "compression/movement_sparsity/model_sparsity": 0.4005394260641812, "compression_loss": 52.4926872253418, "distillation_loss": 2.0362396240234375, "epoch": 2.6, "learning_rate": 4.10913872452334e-05, "loss": 53.9478, "step": 3080, "task_loss": 1.576593279838562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4899434816838958, "compression/movement_sparsity/importance_threshold": -0.003572300597289877, "compression/movement_sparsity/linear_layer_sparsity": 0.41552476067718586, "compression/movement_sparsity/model_sparsity": 0.4012502068442424, "compression_loss": 52.5501594543457, "distillation_loss": 1.1411570310592651, "epoch": 2.6, "learning_rate": 4.108669108669109e-05, "loss": 53.6176, "step": 3081, "task_loss": 1.3787283897399902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4904824635184556, "compression/movement_sparsity/importance_threshold": -0.003568525711448043, "compression/movement_sparsity/linear_layer_sparsity": 0.4162437760614608, "compression/movement_sparsity/model_sparsity": 0.4019445218381174, "compression_loss": 52.60761642456055, "distillation_loss": 2.795780658721924, "epoch": 2.6, "learning_rate": 4.1081994928148774e-05, "loss": 54.0081, "step": 3082, "task_loss": 1.7832911014556885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49102106552087144, "compression/movement_sparsity/importance_threshold": -0.003564753485849822, "compression/movement_sparsity/linear_layer_sparsity": 0.41701823882524247, "compression/movement_sparsity/model_sparsity": 0.40269237942343705, "compression_loss": 52.66501998901367, "distillation_loss": 1.7607216835021973, "epoch": 2.61, "learning_rate": 4.107729876960647e-05, "loss": 54.1451, "step": 3083, "task_loss": 0.8505529761314392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49155928782502856, "compression/movement_sparsity/importance_threshold": -0.0035609839195575163, "compression/movement_sparsity/linear_layer_sparsity": 0.41772236092414017, "compression/movement_sparsity/model_sparsity": 0.40337231276210467, "compression_loss": 52.72239685058594, "distillation_loss": 1.2642372846603394, "epoch": 2.61, "learning_rate": 4.107260261106415e-05, "loss": 54.0129, "step": 3084, "task_loss": 0.7006797194480896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49209713056481197, "compression/movement_sparsity/importance_threshold": -0.003557217011633431, "compression/movement_sparsity/linear_layer_sparsity": 0.41842098598175775, "compression/movement_sparsity/model_sparsity": 0.40404693789977103, "compression_loss": 52.779762268066406, "distillation_loss": 1.3705413341522217, "epoch": 2.61, "learning_rate": 4.106790645252184e-05, "loss": 53.8318, "step": 3085, "task_loss": 2.092862129211426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4926345938741069, "compression/movement_sparsity/importance_threshold": -0.0035534527611398694, "compression/movement_sparsity/linear_layer_sparsity": 0.41915570549480907, "compression/movement_sparsity/model_sparsity": 0.4047564175372874, "compression_loss": 52.83705520629883, "distillation_loss": 1.0804543495178223, "epoch": 2.61, "learning_rate": 4.1063210293979526e-05, "loss": 54.0107, "step": 3086, "task_loss": 0.5854677557945251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4931716778867985, "compression/movement_sparsity/importance_threshold": -0.0035496911671391356, "compression/movement_sparsity/linear_layer_sparsity": 0.4198811480054397, "compression/movement_sparsity/model_sparsity": 0.4054569388659557, "compression_loss": 52.894283294677734, "distillation_loss": 0.3311106562614441, "epoch": 2.61, "learning_rate": 4.105851413543721e-05, "loss": 53.9928, "step": 3087, "task_loss": 0.21367910504341125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49370838273677176, "compression/movement_sparsity/importance_threshold": -0.0035459322286935345, "compression/movement_sparsity/linear_layer_sparsity": 0.4205838630525564, "compression/movement_sparsity/model_sparsity": 0.4061355134893996, "compression_loss": 52.951480865478516, "distillation_loss": 0.7947851419448853, "epoch": 2.61, "learning_rate": 4.1053817976894905e-05, "loss": 54.0601, "step": 3088, "task_loss": 0.9420960545539856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4942447085579118, "compression/movement_sparsity/importance_threshold": -0.0035421759448653702, "compression/movement_sparsity/linear_layer_sparsity": 0.4212521411035407, "compression/movement_sparsity/model_sparsity": 0.4067808341334688, "compression_loss": 53.00864028930664, "distillation_loss": 1.6943480968475342, "epoch": 2.61, "learning_rate": 4.1049121818352585e-05, "loss": 54.5701, "step": 3089, "task_loss": 1.3856816291809082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4947806554841041, "compression/movement_sparsity/importance_threshold": -0.003538422314716944, "compression/movement_sparsity/linear_layer_sparsity": 0.42210064102417344, "compression/movement_sparsity/model_sparsity": 0.40760018547153676, "compression_loss": 53.06572341918945, "distillation_loss": 1.337015151977539, "epoch": 2.61, "learning_rate": 4.104442565981028e-05, "loss": 54.4026, "step": 3090, "task_loss": 1.0604528188705444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49531622364923356, "compression/movement_sparsity/importance_threshold": -0.003534671337310563, "compression/movement_sparsity/linear_layer_sparsity": 0.42272626632752425, "compression/movement_sparsity/model_sparsity": 0.40820431862106893, "compression_loss": 53.122798919677734, "distillation_loss": 1.3674101829528809, "epoch": 2.61, "learning_rate": 4.1039729501267964e-05, "loss": 54.068, "step": 3091, "task_loss": 0.5697746872901917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4958514131871853, "compression/movement_sparsity/importance_threshold": -0.003530923011708529, "compression/movement_sparsity/linear_layer_sparsity": 0.4234179038229072, "compression/movement_sparsity/model_sparsity": 0.4088721962407597, "compression_loss": 53.179805755615234, "distillation_loss": 0.7628318667411804, "epoch": 2.61, "learning_rate": 4.103503334272565e-05, "loss": 54.146, "step": 3092, "task_loss": 0.4658435881137848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49638622423184464, "compression/movement_sparsity/importance_threshold": -0.0035271773369731467, "compression/movement_sparsity/linear_layer_sparsity": 0.4240316049586222, "compression/movement_sparsity/model_sparsity": 0.4094648148544973, "compression_loss": 53.23674011230469, "distillation_loss": 1.1078250408172607, "epoch": 2.61, "learning_rate": 4.103033718418334e-05, "loss": 54.5533, "step": 3093, "task_loss": 0.37736544013023376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4969206569170964, "compression/movement_sparsity/importance_threshold": -0.0035234343121667215, "compression/movement_sparsity/linear_layer_sparsity": 0.42496176157922533, "compression/movement_sparsity/model_sparsity": 0.4103630177336863, "compression_loss": 53.293678283691406, "distillation_loss": 1.5750502347946167, "epoch": 2.61, "learning_rate": 4.1025641025641023e-05, "loss": 54.2085, "step": 3094, "task_loss": 1.7453081607818604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49745471137682606, "compression/movement_sparsity/importance_threshold": -0.003519693936351555, "compression/movement_sparsity/linear_layer_sparsity": 0.42568883770082205, "compression/movement_sparsity/model_sparsity": 0.4110651165537584, "compression_loss": 53.350547790527344, "distillation_loss": 1.0486245155334473, "epoch": 2.62, "learning_rate": 4.1020944867098717e-05, "loss": 54.7001, "step": 3095, "task_loss": 1.222252607345581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4979883877449186, "compression/movement_sparsity/importance_threshold": -0.0035159562085899524, "compression/movement_sparsity/linear_layer_sparsity": 0.4262994147046164, "compression/movement_sparsity/model_sparsity": 0.4116547183591179, "compression_loss": 53.4073600769043, "distillation_loss": 1.1021398305892944, "epoch": 2.62, "learning_rate": 4.10162487085564e-05, "loss": 54.8112, "step": 3096, "task_loss": 0.9640390276908875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49852168615525916, "compression/movement_sparsity/importance_threshold": -0.0035122211279442176, "compression/movement_sparsity/linear_layer_sparsity": 0.4270995263529828, "compression/movement_sparsity/model_sparsity": 0.41242734371093154, "compression_loss": 53.464141845703125, "distillation_loss": 1.5863444805145264, "epoch": 2.62, "learning_rate": 4.1011552550014096e-05, "loss": 54.8146, "step": 3097, "task_loss": 1.0937414169311523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49905460674173285, "compression/movement_sparsity/importance_threshold": -0.0035084886934766547, "compression/movement_sparsity/linear_layer_sparsity": 0.42782007995488275, "compression/movement_sparsity/model_sparsity": 0.41312314407992407, "compression_loss": 53.520896911621094, "distillation_loss": 1.2905917167663574, "epoch": 2.62, "learning_rate": 4.1006856391471776e-05, "loss": 54.5744, "step": 3098, "task_loss": 1.0547055006027222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4995871496382247, "compression/movement_sparsity/importance_threshold": -0.003504758904249569, "compression/movement_sparsity/linear_layer_sparsity": 0.42862054932827826, "compression/movement_sparsity/model_sparsity": 0.4138961148678116, "compression_loss": 53.57758712768555, "distillation_loss": 2.74406099319458, "epoch": 2.62, "learning_rate": 4.100216023292946e-05, "loss": 55.3062, "step": 3099, "task_loss": 2.2682535648345947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5001193149786202, "compression/movement_sparsity/importance_threshold": -0.003501031759325262, "compression/movement_sparsity/linear_layer_sparsity": 0.4293903020458438, "compression/movement_sparsity/model_sparsity": 0.41463942421149236, "compression_loss": 53.63422393798828, "distillation_loss": 0.7816734313964844, "epoch": 2.62, "learning_rate": 4.0997464074387155e-05, "loss": 55.0809, "step": 3100, "task_loss": 0.3357618749141693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5006511028968043, "compression/movement_sparsity/importance_threshold": -0.003497307257766038, "compression/movement_sparsity/linear_layer_sparsity": 0.43018934051912294, "compression/movement_sparsity/model_sparsity": 0.41541101325508456, "compression_loss": 53.690826416015625, "distillation_loss": 2.4887375831604004, "epoch": 2.62, "learning_rate": 4.099276791584484e-05, "loss": 55.239, "step": 3101, "task_loss": 1.9428654909133911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5011825135266621, "compression/movement_sparsity/importance_threshold": -0.003493585398634202, "compression/movement_sparsity/linear_layer_sparsity": 0.4310135747586167, "compression/movement_sparsity/model_sparsity": 0.4162069325128106, "compression_loss": 53.74742126464844, "distillation_loss": 1.369639277458191, "epoch": 2.62, "learning_rate": 4.098807175730253e-05, "loss": 54.8908, "step": 3102, "task_loss": 1.6790672540664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5017135470020788, "compression/movement_sparsity/importance_threshold": -0.0034898661809920576, "compression/movement_sparsity/linear_layer_sparsity": 0.43180612648470196, "compression/movement_sparsity/model_sparsity": 0.41697225764893053, "compression_loss": 53.80393981933594, "distillation_loss": 1.4634547233581543, "epoch": 2.62, "learning_rate": 4.0983375598760214e-05, "loss": 55.1121, "step": 3103, "task_loss": 0.6079245805740356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5022442034569394, "compression/movement_sparsity/importance_threshold": -0.0034861496039019094, "compression/movement_sparsity/linear_layer_sparsity": 0.43256221410615675, "compression/movement_sparsity/model_sparsity": 0.41770237133459076, "compression_loss": 53.86044692993164, "distillation_loss": 0.9265747666358948, "epoch": 2.62, "learning_rate": 4.097867944021791e-05, "loss": 55.0913, "step": 3104, "task_loss": 0.9253263473510742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5027744830251293, "compression/movement_sparsity/importance_threshold": -0.0034824356664260605, "compression/movement_sparsity/linear_layer_sparsity": 0.43337142389442934, "compression/movement_sparsity/model_sparsity": 0.4184837822772157, "compression_loss": 53.916927337646484, "distillation_loss": 1.2841525077819824, "epoch": 2.62, "learning_rate": 4.0973983281675594e-05, "loss": 55.3996, "step": 3105, "task_loss": 1.0232633352279663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5033043858405335, "compression/movement_sparsity/importance_threshold": -0.0034787243676268147, "compression/movement_sparsity/linear_layer_sparsity": 0.434119689261915, "compression/movement_sparsity/model_sparsity": 0.41920634242739474, "compression_loss": 53.97336196899414, "distillation_loss": 1.4775420427322388, "epoch": 2.63, "learning_rate": 4.096928712313327e-05, "loss": 55.2664, "step": 3106, "task_loss": 1.2546635866165161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5038339120370371, "compression/movement_sparsity/importance_threshold": -0.003475015706566477, "compression/movement_sparsity/linear_layer_sparsity": 0.4347484506213541, "compression/movement_sparsity/model_sparsity": 0.41981350389984085, "compression_loss": 54.029762268066406, "distillation_loss": 1.0330852270126343, "epoch": 2.63, "learning_rate": 4.0964590964590966e-05, "loss": 55.2517, "step": 3107, "task_loss": 0.8182848691940308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5043630617485253, "compression/movement_sparsity/importance_threshold": -0.00347130968230735, "compression/movement_sparsity/linear_layer_sparsity": 0.43553159417917464, "compression/movement_sparsity/model_sparsity": 0.4205697440672189, "compression_loss": 54.08612060546875, "distillation_loss": 1.984728455543518, "epoch": 2.63, "learning_rate": 4.095989480604865e-05, "loss": 55.7509, "step": 3108, "task_loss": 1.3393394947052002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5048918351088829, "compression/movement_sparsity/importance_threshold": -0.0034676062939117415, "compression/movement_sparsity/linear_layer_sparsity": 0.4363137003344109, "compression/movement_sparsity/model_sparsity": 0.42132498246998284, "compression_loss": 54.14242172241211, "distillation_loss": 1.0384163856506348, "epoch": 2.63, "learning_rate": 4.0955198647506346e-05, "loss": 55.4472, "step": 3109, "task_loss": 1.0590802431106567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5054202322519957, "compression/movement_sparsity/importance_threshold": -0.0034639055404419496, "compression/movement_sparsity/linear_layer_sparsity": 0.43706835705574937, "compression/movement_sparsity/model_sparsity": 0.42205371441134776, "compression_loss": 54.19868469238281, "distillation_loss": 0.694888710975647, "epoch": 2.63, "learning_rate": 4.095050248896403e-05, "loss": 55.3893, "step": 3110, "task_loss": 0.8273056149482727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5059482533117485, "compression/movement_sparsity/importance_threshold": -0.0034602074209602817, "compression/movement_sparsity/linear_layer_sparsity": 0.43780465055892864, "compression/movement_sparsity/model_sparsity": 0.422764713967589, "compression_loss": 54.254920959472656, "distillation_loss": 1.1678038835525513, "epoch": 2.63, "learning_rate": 4.094580633042172e-05, "loss": 55.9293, "step": 3111, "task_loss": 0.4315330386161804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5064758984220263, "compression/movement_sparsity/importance_threshold": -0.003456511934529042, "compression/movement_sparsity/linear_layer_sparsity": 0.4384777578978055, "compression/movement_sparsity/model_sparsity": 0.42341469799865505, "compression_loss": 54.3111457824707, "distillation_loss": 1.1576933860778809, "epoch": 2.63, "learning_rate": 4.0941110171879405e-05, "loss": 55.7686, "step": 3112, "task_loss": 1.7973809242248535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5070031677167146, "compression/movement_sparsity/importance_threshold": -0.003452819080210533, "compression/movement_sparsity/linear_layer_sparsity": 0.43915708965218825, "compression/movement_sparsity/model_sparsity": 0.4240706926174059, "compression_loss": 54.367271423339844, "distillation_loss": 1.1927461624145508, "epoch": 2.63, "learning_rate": 4.093641401333709e-05, "loss": 55.5985, "step": 3113, "task_loss": 1.561108112335205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5075300613296982, "compression/movement_sparsity/importance_threshold": -0.003449128857067059, "compression/movement_sparsity/linear_layer_sparsity": 0.4398047031206596, "compression/movement_sparsity/model_sparsity": 0.42469605857094317, "compression_loss": 54.42340850830078, "distillation_loss": 0.9795883297920227, "epoch": 2.63, "learning_rate": 4.0931717854794784e-05, "loss": 55.5081, "step": 3114, "task_loss": 0.977310299873352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5080565793948624, "compression/movement_sparsity/importance_threshold": -0.0034454412641609254, "compression/movement_sparsity/linear_layer_sparsity": 0.44055916905531595, "compression/movement_sparsity/model_sparsity": 0.42542460627973533, "compression_loss": 54.4794921875, "distillation_loss": 1.2904127836227417, "epoch": 2.63, "learning_rate": 4.0927021696252464e-05, "loss": 55.9005, "step": 3115, "task_loss": 1.4650390148162842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5085827220460923, "compression/movement_sparsity/importance_threshold": -0.003441756300554434, "compression/movement_sparsity/linear_layer_sparsity": 0.44129987450052044, "compression/movement_sparsity/model_sparsity": 0.4261398662142206, "compression_loss": 54.535526275634766, "distillation_loss": 1.3306338787078857, "epoch": 2.63, "learning_rate": 4.092232553771016e-05, "loss": 55.7659, "step": 3116, "task_loss": 1.3768657445907593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.509108489417273, "compression/movement_sparsity/importance_threshold": -0.003438073965309891, "compression/movement_sparsity/linear_layer_sparsity": 0.44211108754895584, "compression/movement_sparsity/model_sparsity": 0.42692321159885904, "compression_loss": 54.59151077270508, "distillation_loss": 1.842881679534912, "epoch": 2.63, "learning_rate": 4.091762937916784e-05, "loss": 55.9591, "step": 3117, "task_loss": 1.8594143390655518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5096338816422896, "compression/movement_sparsity/importance_threshold": -0.0034343942574896, "compression/movement_sparsity/linear_layer_sparsity": 0.4428697865631229, "compression/movement_sparsity/model_sparsity": 0.4276558469678583, "compression_loss": 54.64747619628906, "distillation_loss": 1.9885168075561523, "epoch": 2.64, "learning_rate": 4.091293322062553e-05, "loss": 56.5996, "step": 3118, "task_loss": 1.7155975103378296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5101588988550273, "compression/movement_sparsity/importance_threshold": -0.0034307171761558644, "compression/movement_sparsity/linear_layer_sparsity": 0.44356760077734125, "compression/movement_sparsity/model_sparsity": 0.42832968911709063, "compression_loss": 54.70343780517578, "distillation_loss": 1.353642463684082, "epoch": 2.64, "learning_rate": 4.0908237062083216e-05, "loss": 55.8876, "step": 3119, "task_loss": 0.4191083014011383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5106835411893714, "compression/movement_sparsity/importance_threshold": -0.0034270427203709868, "compression/movement_sparsity/linear_layer_sparsity": 0.4444180562614663, "compression/movement_sparsity/model_sparsity": 0.42915092883902883, "compression_loss": 54.75932312011719, "distillation_loss": 1.1758902072906494, "epoch": 2.64, "learning_rate": 4.09035409035409e-05, "loss": 55.8899, "step": 3120, "task_loss": 0.31539657711982727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5112078087792069, "compression/movement_sparsity/importance_threshold": -0.003423370889197273, "compression/movement_sparsity/linear_layer_sparsity": 0.44519647784890304, "compression/movement_sparsity/model_sparsity": 0.42990260925023227, "compression_loss": 54.81515884399414, "distillation_loss": 1.3323016166687012, "epoch": 2.64, "learning_rate": 4.0898844744998595e-05, "loss": 55.8952, "step": 3121, "task_loss": 0.7238379716873169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.511731701758419, "compression/movement_sparsity/importance_threshold": -0.0034197016816970263, "compression/movement_sparsity/linear_layer_sparsity": 0.4459388884500795, "compression/movement_sparsity/model_sparsity": 0.4306195157633362, "compression_loss": 54.870967864990234, "distillation_loss": 3.1670589447021484, "epoch": 2.64, "learning_rate": 4.089414858645628e-05, "loss": 56.7952, "step": 3122, "task_loss": 0.8310158848762512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5122552202608928, "compression/movement_sparsity/importance_threshold": -0.0034160350969325505, "compression/movement_sparsity/linear_layer_sparsity": 0.44663192107307587, "compression/movement_sparsity/model_sparsity": 0.43128874058371486, "compression_loss": 54.926700592041016, "distillation_loss": 1.9490631818771362, "epoch": 2.64, "learning_rate": 4.088945242791397e-05, "loss": 56.5619, "step": 3123, "task_loss": 1.2699609994888306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5127783644205133, "compression/movement_sparsity/importance_threshold": -0.0034123711339661507, "compression/movement_sparsity/linear_layer_sparsity": 0.4474319731006041, "compression/movement_sparsity/model_sparsity": 0.4320613083628496, "compression_loss": 54.98245620727539, "distillation_loss": 1.7554600238800049, "epoch": 2.64, "learning_rate": 4.0884756269371654e-05, "loss": 56.4067, "step": 3124, "task_loss": 0.4941505193710327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5133011343711658, "compression/movement_sparsity/importance_threshold": -0.0034087097918601298, "compression/movement_sparsity/linear_layer_sparsity": 0.44818618862774007, "compression/movement_sparsity/model_sparsity": 0.4327896142663901, "compression_loss": 55.03812789916992, "distillation_loss": 1.004862666130066, "epoch": 2.64, "learning_rate": 4.088006011082934e-05, "loss": 56.5542, "step": 3125, "task_loss": 0.8703616857528687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5138235302467355, "compression/movement_sparsity/importance_threshold": -0.003405051069676791, "compression/movement_sparsity/linear_layer_sparsity": 0.44892269676593677, "compression/movement_sparsity/model_sparsity": 0.4335008210842757, "compression_loss": 55.093753814697266, "distillation_loss": 1.1207001209259033, "epoch": 2.64, "learning_rate": 4.0875363952287034e-05, "loss": 56.6376, "step": 3126, "task_loss": 0.8411110639572144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5143455521811073, "compression/movement_sparsity/importance_threshold": -0.003401394966478441, "compression/movement_sparsity/linear_layer_sparsity": 0.44950797068600784, "compression/movement_sparsity/model_sparsity": 0.43406598904467913, "compression_loss": 55.14937973022461, "distillation_loss": 1.105970859527588, "epoch": 2.64, "learning_rate": 4.087066779374472e-05, "loss": 56.1333, "step": 3127, "task_loss": 0.5834990739822388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5148672003081667, "compression/movement_sparsity/importance_threshold": -0.003397741481327381, "compression/movement_sparsity/linear_layer_sparsity": 0.45021637356108685, "compression/movement_sparsity/model_sparsity": 0.43475005610169704, "compression_loss": 55.204959869384766, "distillation_loss": 1.938085675239563, "epoch": 2.64, "learning_rate": 4.0865971635202406e-05, "loss": 56.8655, "step": 3128, "task_loss": 1.2625503540039062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5153884747617983, "compression/movement_sparsity/importance_threshold": -0.003394090613285918, "compression/movement_sparsity/linear_layer_sparsity": 0.4508935470411275, "compression/movement_sparsity/model_sparsity": 0.435403966589469, "compression_loss": 55.260459899902344, "distillation_loss": 0.907630443572998, "epoch": 2.64, "learning_rate": 4.086127547666009e-05, "loss": 56.6096, "step": 3129, "task_loss": 0.9808852076530457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5159093756758878, "compression/movement_sparsity/importance_threshold": -0.0033904423614163526, "compression/movement_sparsity/linear_layer_sparsity": 0.4515794251635423, "compression/movement_sparsity/model_sparsity": 0.436066282688371, "compression_loss": 55.315921783447266, "distillation_loss": 1.4840586185455322, "epoch": 2.65, "learning_rate": 4.0856579318117786e-05, "loss": 57.0072, "step": 3130, "task_loss": 1.2430903911590576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5164299031843201, "compression/movement_sparsity/importance_threshold": -0.00338679672478099, "compression/movement_sparsity/linear_layer_sparsity": 0.4522318321474033, "compression/movement_sparsity/model_sparsity": 0.4366962774852977, "compression_loss": 55.371368408203125, "distillation_loss": 1.6558773517608643, "epoch": 2.65, "learning_rate": 4.085188315957547e-05, "loss": 56.7521, "step": 3131, "task_loss": 1.2319852113723755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5169500574209804, "compression/movement_sparsity/importance_threshold": -0.0033831537024421343, "compression/movement_sparsity/linear_layer_sparsity": 0.45295492559700967, "compression/movement_sparsity/model_sparsity": 0.43739453045041443, "compression_loss": 55.426753997802734, "distillation_loss": 1.2695668935775757, "epoch": 2.65, "learning_rate": 4.084718700103315e-05, "loss": 56.9505, "step": 3132, "task_loss": 1.086560606956482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5174698385197538, "compression/movement_sparsity/importance_threshold": -0.00337951329346209, "compression/movement_sparsity/linear_layer_sparsity": 0.4537304973083815, "compression/movement_sparsity/model_sparsity": 0.43814345888756295, "compression_loss": 55.48211669921875, "distillation_loss": 1.5455138683319092, "epoch": 2.65, "learning_rate": 4.0842490842490845e-05, "loss": 57.1748, "step": 3133, "task_loss": 1.409749984741211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5179892466145253, "compression/movement_sparsity/importance_threshold": -0.003375875496903162, "compression/movement_sparsity/linear_layer_sparsity": 0.45439945503692103, "compression/movement_sparsity/model_sparsity": 0.4387894358601725, "compression_loss": 55.537445068359375, "distillation_loss": 1.3241931200027466, "epoch": 2.65, "learning_rate": 4.083779468394853e-05, "loss": 57.2464, "step": 3134, "task_loss": 1.2736042737960815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5185082818391804, "compression/movement_sparsity/importance_threshold": -0.0033722403118276504, "compression/movement_sparsity/linear_layer_sparsity": 0.45503917470641747, "compression/movement_sparsity/model_sparsity": 0.4394071791910138, "compression_loss": 55.59274673461914, "distillation_loss": 1.102333426475525, "epoch": 2.65, "learning_rate": 4.0833098525406224e-05, "loss": 57.1977, "step": 3135, "task_loss": 1.0903507471084595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5190269443276039, "compression/movement_sparsity/importance_threshold": -0.003368607737297863, "compression/movement_sparsity/linear_layer_sparsity": 0.45562721503338005, "compression/movement_sparsity/model_sparsity": 0.4399750185237216, "compression_loss": 55.64801025390625, "distillation_loss": 1.064621925354004, "epoch": 2.65, "learning_rate": 4.0828402366863904e-05, "loss": 56.9824, "step": 3136, "task_loss": 1.6531810760498047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.519545234213681, "compression/movement_sparsity/importance_threshold": -0.0033649777723761026, "compression/movement_sparsity/linear_layer_sparsity": 0.4562850952101859, "compression/movement_sparsity/model_sparsity": 0.440610298492578, "compression_loss": 55.7032470703125, "distillation_loss": 1.2678351402282715, "epoch": 2.65, "learning_rate": 4.08237062083216e-05, "loss": 57.1982, "step": 3137, "task_loss": 1.4507288932800293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5200631516312969, "compression/movement_sparsity/importance_threshold": -0.0033613504161246737, "compression/movement_sparsity/linear_layer_sparsity": 0.4568534725847169, "compression/movement_sparsity/model_sparsity": 0.4411591503557606, "compression_loss": 55.75844192504883, "distillation_loss": 1.601014256477356, "epoch": 2.65, "learning_rate": 4.081901004977928e-05, "loss": 57.0944, "step": 3138, "task_loss": 1.467469334602356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5205806967143366, "compression/movement_sparsity/importance_threshold": -0.0033577256676058805, "compression/movement_sparsity/linear_layer_sparsity": 0.45754954587046043, "compression/movement_sparsity/model_sparsity": 0.4418313113827669, "compression_loss": 55.81358337402344, "distillation_loss": 1.1885582208633423, "epoch": 2.65, "learning_rate": 4.081431389123697e-05, "loss": 57.3256, "step": 3139, "task_loss": 1.1096551418304443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5210978695966855, "compression/movement_sparsity/importance_threshold": -0.003354103525882025, "compression/movement_sparsity/linear_layer_sparsity": 0.4582610967257953, "compression/movement_sparsity/model_sparsity": 0.44251841827723454, "compression_loss": 55.86868667602539, "distillation_loss": 1.1433416604995728, "epoch": 2.65, "learning_rate": 4.080961773269466e-05, "loss": 57.1295, "step": 3140, "task_loss": 1.646550178527832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5216146704122288, "compression/movement_sparsity/importance_threshold": -0.0033504839900154117, "compression/movement_sparsity/linear_layer_sparsity": 0.45896054455097973, "compression/movement_sparsity/model_sparsity": 0.4431938379178707, "compression_loss": 55.92373275756836, "distillation_loss": 1.1853364706039429, "epoch": 2.65, "learning_rate": 4.080492157415234e-05, "loss": 57.1464, "step": 3141, "task_loss": 0.7650507092475891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5221310992948514, "compression/movement_sparsity/importance_threshold": -0.0033468670590683456, "compression/movement_sparsity/linear_layer_sparsity": 0.4596704021745103, "compression/movement_sparsity/model_sparsity": 0.4438793097482555, "compression_loss": 55.97872543334961, "distillation_loss": 0.8294848203659058, "epoch": 2.66, "learning_rate": 4.0800225415610036e-05, "loss": 57.1471, "step": 3142, "task_loss": 0.3979688882827759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5226471563784383, "compression/movement_sparsity/importance_threshold": -0.003343252732103131, "compression/movement_sparsity/linear_layer_sparsity": 0.4604246653983168, "compression/movement_sparsity/model_sparsity": 0.4446076617099392, "compression_loss": 56.03367614746094, "distillation_loss": 1.0178886651992798, "epoch": 2.66, "learning_rate": 4.079552925706772e-05, "loss": 57.3249, "step": 3143, "task_loss": 1.4002323150634766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.523162841796875, "compression/movement_sparsity/importance_threshold": -0.0033396410081820704, "compression/movement_sparsity/linear_layer_sparsity": 0.4611335333159336, "compression/movement_sparsity/model_sparsity": 0.44529217783385305, "compression_loss": 56.08857345581055, "distillation_loss": 1.1955482959747314, "epoch": 2.66, "learning_rate": 4.079083309852541e-05, "loss": 57.4477, "step": 3144, "task_loss": 0.39195290207862854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5236781556840464, "compression/movement_sparsity/importance_threshold": -0.003336031886367469, "compression/movement_sparsity/linear_layer_sparsity": 0.4618110883693386, "compression/movement_sparsity/model_sparsity": 0.4459464567867705, "compression_loss": 56.14344787597656, "distillation_loss": 1.0837123394012451, "epoch": 2.66, "learning_rate": 4.0786136939983095e-05, "loss": 57.8087, "step": 3145, "task_loss": 1.658868432044983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5241930981738379, "compression/movement_sparsity/importance_threshold": -0.00333242536572163, "compression/movement_sparsity/linear_layer_sparsity": 0.4625975349215943, "compression/movement_sparsity/model_sparsity": 0.4467058864805636, "compression_loss": 56.19829559326172, "distillation_loss": 1.3995057344436646, "epoch": 2.66, "learning_rate": 4.078144078144078e-05, "loss": 57.9952, "step": 3146, "task_loss": 0.6903823018074036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5247076694001342, "compression/movement_sparsity/importance_threshold": -0.0033288214453068583, "compression/movement_sparsity/linear_layer_sparsity": 0.46321326316580735, "compression/movement_sparsity/model_sparsity": 0.4473004625653863, "compression_loss": 56.25309371948242, "distillation_loss": 1.887570858001709, "epoch": 2.66, "learning_rate": 4.0776744622898474e-05, "loss": 57.4979, "step": 3147, "task_loss": 1.6376639604568481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5252218694968209, "compression/movement_sparsity/importance_threshold": -0.003325220124185457, "compression/movement_sparsity/linear_layer_sparsity": 0.46376330117051445, "compression/movement_sparsity/model_sparsity": 0.4478316050725169, "compression_loss": 56.307865142822266, "distillation_loss": 1.1850448846817017, "epoch": 2.66, "learning_rate": 4.077204846435616e-05, "loss": 58.0396, "step": 3148, "task_loss": 1.1419734954833984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.525735698597783, "compression/movement_sparsity/importance_threshold": -0.0033216214014197293, "compression/movement_sparsity/linear_layer_sparsity": 0.46443825675537487, "compression/movement_sparsity/model_sparsity": 0.44848337385663106, "compression_loss": 56.36259078979492, "distillation_loss": 1.2688450813293457, "epoch": 2.66, "learning_rate": 4.076735230581385e-05, "loss": 57.5172, "step": 3149, "task_loss": 0.7160232663154602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5262491568369052, "compression/movement_sparsity/importance_threshold": -0.0033180252760719835, "compression/movement_sparsity/linear_layer_sparsity": 0.46511447630200464, "compression/movement_sparsity/model_sparsity": 0.4491363631815395, "compression_loss": 56.41731262207031, "distillation_loss": 1.0588514804840088, "epoch": 2.66, "learning_rate": 4.076265614727153e-05, "loss": 57.7745, "step": 3150, "task_loss": 0.9721276760101318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5267622443480733, "compression/movement_sparsity/importance_threshold": -0.0033144317472045184, "compression/movement_sparsity/linear_layer_sparsity": 0.46584626246981753, "compression/movement_sparsity/model_sparsity": 0.44984301024325046, "compression_loss": 56.47195053100586, "distillation_loss": 1.5750703811645508, "epoch": 2.66, "learning_rate": 4.075795998872922e-05, "loss": 57.9782, "step": 3151, "task_loss": 1.784942865371704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5272749612651723, "compression/movement_sparsity/importance_threshold": -0.003310840813879639, "compression/movement_sparsity/linear_layer_sparsity": 0.46647248398155017, "compression/movement_sparsity/model_sparsity": 0.4504477191195723, "compression_loss": 56.526580810546875, "distillation_loss": 2.9790143966674805, "epoch": 2.66, "learning_rate": 4.075326383018691e-05, "loss": 58.8701, "step": 3152, "task_loss": 1.8907215595245361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.527787307722087, "compression/movement_sparsity/importance_threshold": -0.0033072524751596504, "compression/movement_sparsity/linear_layer_sparsity": 0.46713847259234836, "compression/movement_sparsity/model_sparsity": 0.45109082897276903, "compression_loss": 56.58113479614258, "distillation_loss": 1.3961204290390015, "epoch": 2.66, "learning_rate": 4.074856767164459e-05, "loss": 57.7439, "step": 3153, "task_loss": 0.5844182968139648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5282992838527029, "compression/movement_sparsity/importance_threshold": -0.003303666730106857, "compression/movement_sparsity/linear_layer_sparsity": 0.4679034198703566, "compression/movement_sparsity/model_sparsity": 0.4518294979585246, "compression_loss": 56.63566207885742, "distillation_loss": 1.3831321001052856, "epoch": 2.67, "learning_rate": 4.0743871513102285e-05, "loss": 58.2501, "step": 3154, "task_loss": 1.2835367918014526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5288108897909048, "compression/movement_sparsity/importance_threshold": -0.0033000835777835624, "compression/movement_sparsity/linear_layer_sparsity": 0.4685845521740524, "compression/movement_sparsity/model_sparsity": 0.4524872312721804, "compression_loss": 56.690185546875, "distillation_loss": 0.9613630771636963, "epoch": 2.67, "learning_rate": 4.073917535455997e-05, "loss": 57.8719, "step": 3155, "task_loss": 1.0267081260681152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5293221256705782, "compression/movement_sparsity/importance_threshold": -0.0032965030172520697, "compression/movement_sparsity/linear_layer_sparsity": 0.4692471066245715, "compression/movement_sparsity/model_sparsity": 0.45312702493906826, "compression_loss": 56.74460983276367, "distillation_loss": 1.402208924293518, "epoch": 2.67, "learning_rate": 4.073447919601766e-05, "loss": 58.3487, "step": 3156, "task_loss": 1.5169488191604614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.529832991625608, "compression/movement_sparsity/importance_threshold": -0.0032929250475746837, "compression/movement_sparsity/linear_layer_sparsity": 0.46993381943872087, "compression/movement_sparsity/model_sparsity": 0.4537901470554758, "compression_loss": 56.79901123046875, "distillation_loss": 1.909407615661621, "epoch": 2.67, "learning_rate": 4.072978303747535e-05, "loss": 58.2315, "step": 3157, "task_loss": 0.8726487159729004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5303434877898795, "compression/movement_sparsity/importance_threshold": -0.003289349667813708, "compression/movement_sparsity/linear_layer_sparsity": 0.47075058914927453, "compression/movement_sparsity/model_sparsity": 0.4545788582137945, "compression_loss": 56.853363037109375, "distillation_loss": 1.6018075942993164, "epoch": 2.67, "learning_rate": 4.072508687893303e-05, "loss": 58.5589, "step": 3158, "task_loss": 1.5681837797164917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5308536142972777, "compression/movement_sparsity/importance_threshold": -0.0032857768770314464, "compression/movement_sparsity/linear_layer_sparsity": 0.4716123964409889, "compression/movement_sparsity/model_sparsity": 0.45541105977380913, "compression_loss": 56.9077033996582, "distillation_loss": 1.9071273803710938, "epoch": 2.67, "learning_rate": 4.0720390720390724e-05, "loss": 58.6694, "step": 3159, "task_loss": 1.5857409238815308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5313633712816875, "compression/movement_sparsity/importance_threshold": -0.003282206674290206, "compression/movement_sparsity/linear_layer_sparsity": 0.47234378911126984, "compression/movement_sparsity/model_sparsity": 0.4561173268558389, "compression_loss": 56.96199035644531, "distillation_loss": 1.752554178237915, "epoch": 2.67, "learning_rate": 4.071569456184841e-05, "loss": 58.87, "step": 3160, "task_loss": 1.9640041589736938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5318727588769947, "compression/movement_sparsity/importance_threshold": -0.003278639058652285, "compression/movement_sparsity/linear_layer_sparsity": 0.4729347508593032, "compression/movement_sparsity/model_sparsity": 0.45668798724981635, "compression_loss": 57.01624298095703, "distillation_loss": 1.748987078666687, "epoch": 2.67, "learning_rate": 4.07109984033061e-05, "loss": 58.5835, "step": 3161, "task_loss": 1.0655022859573364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5323817772170839, "compression/movement_sparsity/importance_threshold": -0.003275074029179992, "compression/movement_sparsity/linear_layer_sparsity": 0.4736234073127772, "compression/movement_sparsity/model_sparsity": 0.4573529862355585, "compression_loss": 57.07045364379883, "distillation_loss": 1.2259505987167358, "epoch": 2.67, "learning_rate": 4.070630224476378e-05, "loss": 58.4556, "step": 3162, "task_loss": 0.23454053699970245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5328904264358405, "compression/movement_sparsity/importance_threshold": -0.003271511584935629, "compression/movement_sparsity/linear_layer_sparsity": 0.47419929691291884, "compression/movement_sparsity/model_sparsity": 0.4579090922562916, "compression_loss": 57.1246337890625, "distillation_loss": 1.1683413982391357, "epoch": 2.67, "learning_rate": 4.070160608622147e-05, "loss": 58.5648, "step": 3163, "task_loss": 2.109626293182373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5333987066671495, "compression/movement_sparsity/importance_threshold": -0.0032679517249815, "compression/movement_sparsity/linear_layer_sparsity": 0.47478903047168575, "compression/movement_sparsity/model_sparsity": 0.4584785666530822, "compression_loss": 57.178768157958984, "distillation_loss": 1.8769023418426514, "epoch": 2.67, "learning_rate": 4.069690992767916e-05, "loss": 58.7883, "step": 3164, "task_loss": 2.054685592651367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5339066180448961, "compression/movement_sparsity/importance_threshold": -0.0032643944483799095, "compression/movement_sparsity/linear_layer_sparsity": 0.47536050812980213, "compression/movement_sparsity/model_sparsity": 0.45903041229557134, "compression_loss": 57.23289489746094, "distillation_loss": 1.4572021961212158, "epoch": 2.67, "learning_rate": 4.069221376913685e-05, "loss": 58.5199, "step": 3165, "task_loss": 1.5350674390792847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5344141607029655, "compression/movement_sparsity/importance_threshold": -0.0032608397541931612, "compression/movement_sparsity/linear_layer_sparsity": 0.4759527696121078, "compression/movement_sparsity/model_sparsity": 0.45960232777395044, "compression_loss": 57.28696823120117, "distillation_loss": 1.1777393817901611, "epoch": 2.68, "learning_rate": 4.0687517610594535e-05, "loss": 58.5678, "step": 3166, "task_loss": 1.2994797229766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5349213347752428, "compression/movement_sparsity/importance_threshold": -0.0032572876414835587, "compression/movement_sparsity/linear_layer_sparsity": 0.4767330036730252, "compression/movement_sparsity/model_sparsity": 0.4603557583945946, "compression_loss": 57.340965270996094, "distillation_loss": 2.705673933029175, "epoch": 2.68, "learning_rate": 4.068282145205222e-05, "loss": 58.8753, "step": 3167, "task_loss": 2.175607919692993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.535428140395613, "compression/movement_sparsity/importance_threshold": -0.0032537381093134077, "compression/movement_sparsity/linear_layer_sparsity": 0.477335674953677, "compression/movement_sparsity/model_sparsity": 0.46093772606272226, "compression_loss": 57.39493942260742, "distillation_loss": 1.0088363885879517, "epoch": 2.68, "learning_rate": 4.0678125293509914e-05, "loss": 58.8704, "step": 3168, "task_loss": 0.5511782765388489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5359345776979615, "compression/movement_sparsity/importance_threshold": -0.00325019115674501, "compression/movement_sparsity/linear_layer_sparsity": 0.4779708395911365, "compression/movement_sparsity/model_sparsity": 0.4615510708408901, "compression_loss": 57.44886016845703, "distillation_loss": 1.4820741415023804, "epoch": 2.68, "learning_rate": 4.06734291349676e-05, "loss": 58.7629, "step": 3169, "task_loss": 1.792464017868042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.536440646816173, "compression/movement_sparsity/importance_threshold": -0.0032466467828406722, "compression/movement_sparsity/linear_layer_sparsity": 0.47855704359628315, "compression/movement_sparsity/model_sparsity": 0.4621171369350855, "compression_loss": 57.502716064453125, "distillation_loss": 1.3986766338348389, "epoch": 2.68, "learning_rate": 4.066873297642528e-05, "loss": 58.7571, "step": 3170, "task_loss": 0.8458770513534546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5369463478841332, "compression/movement_sparsity/importance_threshold": -0.0032431049866626952, "compression/movement_sparsity/linear_layer_sparsity": 0.4791641148947926, "compression/movement_sparsity/model_sparsity": 0.4627033534669214, "compression_loss": 57.556549072265625, "distillation_loss": 1.7659282684326172, "epoch": 2.68, "learning_rate": 4.066403681788297e-05, "loss": 59.0347, "step": 3171, "task_loss": 2.0756285190582275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5374516810357268, "compression/movement_sparsity/importance_threshold": -0.0032395657672733853, "compression/movement_sparsity/linear_layer_sparsity": 0.4798046692560235, "compression/movement_sparsity/model_sparsity": 0.4633219028152683, "compression_loss": 57.61033630371094, "distillation_loss": 0.8603739142417908, "epoch": 2.68, "learning_rate": 4.065934065934066e-05, "loss": 59.0686, "step": 3172, "task_loss": 0.22714926302433014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5379566464048393, "compression/movement_sparsity/importance_threshold": -0.0032360291237350444, "compression/movement_sparsity/linear_layer_sparsity": 0.4804801852767628, "compression/movement_sparsity/model_sparsity": 0.46397421278256484, "compression_loss": 57.6640739440918, "distillation_loss": 1.4142537117004395, "epoch": 2.68, "learning_rate": 4.065464450079835e-05, "loss": 59.238, "step": 3173, "task_loss": 0.47118258476257324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5384612441253557, "compression/movement_sparsity/importance_threshold": -0.0032324950551099775, "compression/movement_sparsity/linear_layer_sparsity": 0.4810939698816512, "compression/movement_sparsity/model_sparsity": 0.46456691199805306, "compression_loss": 57.717811584472656, "distillation_loss": 1.3783066272735596, "epoch": 2.68, "learning_rate": 4.064994834225604e-05, "loss": 59.0028, "step": 3174, "task_loss": 1.300521969795227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5389654743311609, "compression/movement_sparsity/importance_threshold": -0.0032289635604604897, "compression/movement_sparsity/linear_layer_sparsity": 0.48175378177361405, "compression/movement_sparsity/model_sparsity": 0.46520405732170816, "compression_loss": 57.771461486816406, "distillation_loss": 2.1710128784179688, "epoch": 2.68, "learning_rate": 4.0645252183713725e-05, "loss": 59.7202, "step": 3175, "task_loss": 1.5312451124191284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5394693371561404, "compression/movement_sparsity/importance_threshold": -0.003225434638848883, "compression/movement_sparsity/linear_layer_sparsity": 0.4824322788362623, "compression/movement_sparsity/model_sparsity": 0.4658592459229533, "compression_loss": 57.825103759765625, "distillation_loss": 1.1456093788146973, "epoch": 2.68, "learning_rate": 4.064055602517141e-05, "loss": 59.2378, "step": 3176, "task_loss": 0.935524582862854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5399728327341792, "compression/movement_sparsity/importance_threshold": -0.003221908289337463, "compression/movement_sparsity/linear_layer_sparsity": 0.4833285469724443, "compression/movement_sparsity/model_sparsity": 0.46672472449141417, "compression_loss": 57.878692626953125, "distillation_loss": 2.597580909729004, "epoch": 2.69, "learning_rate": 4.06358598666291e-05, "loss": 59.8918, "step": 3177, "task_loss": 1.5374902486801147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5404759611991623, "compression/movement_sparsity/importance_threshold": -0.0032183845109885335, "compression/movement_sparsity/linear_layer_sparsity": 0.4841042140771572, "compression/movement_sparsity/model_sparsity": 0.46747374504484906, "compression_loss": 57.9322509765625, "distillation_loss": 1.9911036491394043, "epoch": 2.69, "learning_rate": 4.063116370808679e-05, "loss": 59.5648, "step": 3178, "task_loss": 1.6181129217147827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.540978722684975, "compression/movement_sparsity/importance_threshold": -0.0032148633028643977, "compression/movement_sparsity/linear_layer_sparsity": 0.48483422354399236, "compression/movement_sparsity/model_sparsity": 0.46817867644072664, "compression_loss": 57.985809326171875, "distillation_loss": 1.5482800006866455, "epoch": 2.69, "learning_rate": 4.062646754954447e-05, "loss": 59.3552, "step": 3179, "task_loss": 1.6430931091308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5414811173255023, "compression/movement_sparsity/importance_threshold": -0.003211344664027361, "compression/movement_sparsity/linear_layer_sparsity": 0.4856544393389928, "compression/movement_sparsity/model_sparsity": 0.46897071529988993, "compression_loss": 58.03929138183594, "distillation_loss": 1.9776475429534912, "epoch": 2.69, "learning_rate": 4.0621771391002164e-05, "loss": 59.6949, "step": 3180, "task_loss": 1.523226261138916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5419831452546295, "compression/movement_sparsity/importance_threshold": -0.0032078285935397254, "compression/movement_sparsity/linear_layer_sparsity": 0.4862957806952877, "compression/movement_sparsity/model_sparsity": 0.4695900246075993, "compression_loss": 58.0927619934082, "distillation_loss": 0.862718403339386, "epoch": 2.69, "learning_rate": 4.061707523245985e-05, "loss": 59.7823, "step": 3181, "task_loss": 0.17751680314540863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5424848066062418, "compression/movement_sparsity/importance_threshold": -0.0032043150904637954, "compression/movement_sparsity/linear_layer_sparsity": 0.4868397612228358, "compression/movement_sparsity/model_sparsity": 0.47011531773054627, "compression_loss": 58.14616394042969, "distillation_loss": 1.832399845123291, "epoch": 2.69, "learning_rate": 4.0612379073917537e-05, "loss": 59.8514, "step": 3182, "task_loss": 1.7924641370773315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5429861015142242, "compression/movement_sparsity/importance_threshold": -0.0032008041538618753, "compression/movement_sparsity/linear_layer_sparsity": 0.4874428975460254, "compression/movement_sparsity/model_sparsity": 0.4706977344655699, "compression_loss": 58.19955062866211, "distillation_loss": 1.6733181476593018, "epoch": 2.69, "learning_rate": 4.060768291537522e-05, "loss": 59.327, "step": 3183, "task_loss": 1.151419997215271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5434870301124619, "compression/movement_sparsity/importance_threshold": -0.003197295782796269, "compression/movement_sparsity/linear_layer_sparsity": 0.48809196576294833, "compression/movement_sparsity/model_sparsity": 0.47132450519247415, "compression_loss": 58.252864837646484, "distillation_loss": 1.310351848602295, "epoch": 2.69, "learning_rate": 4.060298675683291e-05, "loss": 59.5851, "step": 3184, "task_loss": 1.4411141872406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5439875925348399, "compression/movement_sparsity/importance_threshold": -0.0031937899763292816, "compression/movement_sparsity/linear_layer_sparsity": 0.4888370354535076, "compression/movement_sparsity/model_sparsity": 0.4720439794470602, "compression_loss": 58.30614471435547, "distillation_loss": 1.3653380870819092, "epoch": 2.69, "learning_rate": 4.05982905982906e-05, "loss": 59.873, "step": 3185, "task_loss": 1.3209458589553833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5444877889152435, "compression/movement_sparsity/importance_threshold": -0.003190286733523215, "compression/movement_sparsity/linear_layer_sparsity": 0.48947371446025684, "compression/movement_sparsity/model_sparsity": 0.47265878657127397, "compression_loss": 58.35938262939453, "distillation_loss": 1.6706267595291138, "epoch": 2.69, "learning_rate": 4.059359443974829e-05, "loss": 59.7526, "step": 3186, "task_loss": 1.3089333772659302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5449876193875578, "compression/movement_sparsity/importance_threshold": -0.003186786053440375, "compression/movement_sparsity/linear_layer_sparsity": 0.49008451802323627, "compression/movement_sparsity/model_sparsity": 0.4732486071528135, "compression_loss": 58.412567138671875, "distillation_loss": 2.037510633468628, "epoch": 2.69, "learning_rate": 4.0588898281205975e-05, "loss": 60.0894, "step": 3187, "task_loss": 0.9613476395606995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5454870840856679, "compression/movement_sparsity/importance_threshold": -0.0031832879351430645, "compression/movement_sparsity/linear_layer_sparsity": 0.49073495751943735, "compression/movement_sparsity/model_sparsity": 0.47387670205133414, "compression_loss": 58.46571731567383, "distillation_loss": 1.2410633563995361, "epoch": 2.69, "learning_rate": 4.058420212266366e-05, "loss": 60.0393, "step": 3188, "task_loss": 1.5163161754608154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.545986183143459, "compression/movement_sparsity/importance_threshold": -0.0031797923776935876, "compression/movement_sparsity/linear_layer_sparsity": 0.49148907765323224, "compression/movement_sparsity/model_sparsity": 0.47460491583858827, "compression_loss": 58.518821716308594, "distillation_loss": 1.7192423343658447, "epoch": 2.7, "learning_rate": 4.057950596412135e-05, "loss": 60.7557, "step": 3189, "task_loss": 1.6420795917510986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.546484916694816, "compression/movement_sparsity/importance_threshold": -0.0031762993801542506, "compression/movement_sparsity/linear_layer_sparsity": 0.49216837363511207, "compression/movement_sparsity/model_sparsity": 0.4752608759137317, "compression_loss": 58.57189178466797, "distillation_loss": 1.687978982925415, "epoch": 2.7, "learning_rate": 4.057480980557904e-05, "loss": 60.0822, "step": 3190, "task_loss": 1.119887113571167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5469832848736245, "compression/movement_sparsity/importance_threshold": -0.0031728089415873537, "compression/movement_sparsity/linear_layer_sparsity": 0.49281641637361834, "compression/movement_sparsity/model_sparsity": 0.4758866563905576, "compression_loss": 58.624908447265625, "distillation_loss": 1.252699375152588, "epoch": 2.7, "learning_rate": 4.057011364703673e-05, "loss": 60.2995, "step": 3191, "task_loss": 0.2589733302593231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5474812878137694, "compression/movement_sparsity/importance_threshold": -0.0031693210610552022, "compression/movement_sparsity/linear_layer_sparsity": 0.4934945557112375, "compression/movement_sparsity/model_sparsity": 0.4765414995557289, "compression_loss": 58.67790985107422, "distillation_loss": 1.2184535264968872, "epoch": 2.7, "learning_rate": 4.0565417488494413e-05, "loss": 60.3341, "step": 3192, "task_loss": 0.4556049406528473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5479789256491356, "compression/movement_sparsity/importance_threshold": -0.0031658357376201014, "compression/movement_sparsity/linear_layer_sparsity": 0.49418818454261565, "compression/movement_sparsity/model_sparsity": 0.47721130010289736, "compression_loss": 58.73088073730469, "distillation_loss": 0.9668209552764893, "epoch": 2.7, "learning_rate": 4.05607213299521e-05, "loss": 60.3313, "step": 3193, "task_loss": 1.2551921606063843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5484761985136087, "compression/movement_sparsity/importance_threshold": -0.0031623529703443537, "compression/movement_sparsity/linear_layer_sparsity": 0.49479116585162597, "compression/movement_sparsity/model_sparsity": 0.4777935671489557, "compression_loss": 58.78380584716797, "distillation_loss": 0.7016304135322571, "epoch": 2.7, "learning_rate": 4.0556025171409786e-05, "loss": 60.1835, "step": 3194, "task_loss": 0.8164786696434021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5489731065410736, "compression/movement_sparsity/importance_threshold": -0.003158872758290264, "compression/movement_sparsity/linear_layer_sparsity": 0.4954844011854721, "compression/movement_sparsity/model_sparsity": 0.4784629877164429, "compression_loss": 58.83666229248047, "distillation_loss": 1.6767808198928833, "epoch": 2.7, "learning_rate": 4.055132901286748e-05, "loss": 60.4673, "step": 3195, "task_loss": 1.5425060987472534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5494696498654155, "compression/movement_sparsity/importance_threshold": -0.003155395100520135, "compression/movement_sparsity/linear_layer_sparsity": 0.4963608513550433, "compression/movement_sparsity/model_sparsity": 0.4793093291264132, "compression_loss": 58.8895263671875, "distillation_loss": 1.538779854774475, "epoch": 2.7, "learning_rate": 4.054663285432516e-05, "loss": 60.2842, "step": 3196, "task_loss": 0.3826006352901459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5499658286205193, "compression/movement_sparsity/importance_threshold": -0.0031519199960962727, "compression/movement_sparsity/linear_layer_sparsity": 0.49715571636964995, "compression/movement_sparsity/model_sparsity": 0.48007688808247734, "compression_loss": 58.94231414794922, "distillation_loss": 1.7234290838241577, "epoch": 2.7, "learning_rate": 4.054193669578285e-05, "loss": 60.6546, "step": 3197, "task_loss": 0.7311191558837891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5504616429402704, "compression/movement_sparsity/importance_threshold": -0.00314844744408098, "compression/movement_sparsity/linear_layer_sparsity": 0.4976947006709586, "compression/movement_sparsity/model_sparsity": 0.48059735661492636, "compression_loss": 58.995086669921875, "distillation_loss": 2.7852134704589844, "epoch": 2.7, "learning_rate": 4.053724053724054e-05, "loss": 60.6698, "step": 3198, "task_loss": 1.1367361545562744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5509570929585539, "compression/movement_sparsity/importance_threshold": -0.003144977443536561, "compression/movement_sparsity/linear_layer_sparsity": 0.4984495124064764, "compression/movement_sparsity/model_sparsity": 0.4813262382452566, "compression_loss": 59.047813415527344, "distillation_loss": 2.6569573879241943, "epoch": 2.7, "learning_rate": 4.053254437869823e-05, "loss": 61.0445, "step": 3199, "task_loss": 1.802232265472412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5514521788092548, "compression/movement_sparsity/importance_threshold": -0.0031415099935253204, "compression/movement_sparsity/linear_layer_sparsity": 0.4989969390184712, "compression/movement_sparsity/model_sparsity": 0.4818548590690482, "compression_loss": 59.10051345825195, "distillation_loss": 1.2958006858825684, "epoch": 2.7, "learning_rate": 4.052784822015591e-05, "loss": 60.5185, "step": 3200, "task_loss": 1.6842865943908691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5519469006262585, "compression/movement_sparsity/importance_threshold": -0.00313804509310956, "compression/movement_sparsity/linear_layer_sparsity": 0.49967661657371537, "compression/movement_sparsity/model_sparsity": 0.482511187609337, "compression_loss": 59.153106689453125, "distillation_loss": 0.7837212085723877, "epoch": 2.71, "learning_rate": 4.05231520616136e-05, "loss": 60.4069, "step": 3201, "task_loss": 1.0939966440200806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5524412585434499, "compression/movement_sparsity/importance_threshold": -0.003134582741351586, "compression/movement_sparsity/linear_layer_sparsity": 0.5004195279899326, "compression/movement_sparsity/model_sparsity": 0.48322857773294425, "compression_loss": 59.20572280883789, "distillation_loss": 1.9697004556655884, "epoch": 2.71, "learning_rate": 4.051845590307129e-05, "loss": 61.1588, "step": 3202, "task_loss": 2.5490407943725586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5529352526947144, "compression/movement_sparsity/importance_threshold": -0.0031311229373137003, "compression/movement_sparsity/linear_layer_sparsity": 0.5010503880028755, "compression/movement_sparsity/model_sparsity": 0.48383776576369025, "compression_loss": 59.25829315185547, "distillation_loss": 0.9899154305458069, "epoch": 2.71, "learning_rate": 4.051375974452898e-05, "loss": 60.8375, "step": 3203, "task_loss": 0.7529734373092651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5534288832139369, "compression/movement_sparsity/importance_threshold": -0.0031276656800582094, "compression/movement_sparsity/linear_layer_sparsity": 0.5018538980390181, "compression/movement_sparsity/model_sparsity": 0.4846136727582054, "compression_loss": 59.31081008911133, "distillation_loss": 1.1852328777313232, "epoch": 2.71, "learning_rate": 4.050906358598667e-05, "loss": 60.9939, "step": 3204, "task_loss": 0.7671400308609009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5539221502350027, "compression/movement_sparsity/importance_threshold": -0.003124210968647415, "compression/movement_sparsity/linear_layer_sparsity": 0.5024524435576679, "compression/movement_sparsity/model_sparsity": 0.48519165639694817, "compression_loss": 59.36328887939453, "distillation_loss": 1.711308479309082, "epoch": 2.71, "learning_rate": 4.050436742744435e-05, "loss": 60.7577, "step": 3205, "task_loss": 1.168859839439392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5544150538917967, "compression/movement_sparsity/importance_threshold": -0.003120758802143623, "compression/movement_sparsity/linear_layer_sparsity": 0.5031805213093461, "compression/movement_sparsity/model_sparsity": 0.485894722438027, "compression_loss": 59.415714263916016, "distillation_loss": 1.759476900100708, "epoch": 2.71, "learning_rate": 4.049967126890204e-05, "loss": 61.2151, "step": 3206, "task_loss": 1.6526076793670654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5549075943182042, "compression/movement_sparsity/importance_threshold": -0.003117309179609136, "compression/movement_sparsity/linear_layer_sparsity": 0.5038841545173708, "compression/movement_sparsity/model_sparsity": 0.4865741836807271, "compression_loss": 59.46808624267578, "distillation_loss": 2.3347909450531006, "epoch": 2.71, "learning_rate": 4.049497511035973e-05, "loss": 61.1834, "step": 3207, "task_loss": 1.211999535560608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5553997716481105, "compression/movement_sparsity/importance_threshold": -0.0031138621001062586, "compression/movement_sparsity/linear_layer_sparsity": 0.504497092506357, "compression/movement_sparsity/model_sparsity": 0.48716606536417384, "compression_loss": 59.520450592041016, "distillation_loss": 1.7178672552108765, "epoch": 2.71, "learning_rate": 4.0490278951817415e-05, "loss": 61.4408, "step": 3208, "task_loss": 1.9305129051208496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5558915860154006, "compression/movement_sparsity/importance_threshold": -0.003110417562697294, "compression/movement_sparsity/linear_layer_sparsity": 0.5050956499491744, "compression/movement_sparsity/model_sparsity": 0.4877440605174524, "compression_loss": 59.57276153564453, "distillation_loss": 1.8688201904296875, "epoch": 2.71, "learning_rate": 4.04855827932751e-05, "loss": 61.6319, "step": 3209, "task_loss": 2.906649351119995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5563830375539593, "compression/movement_sparsity/importance_threshold": -0.003106975566444548, "compression/movement_sparsity/linear_layer_sparsity": 0.505801250644859, "compression/movement_sparsity/model_sparsity": 0.48842542165855857, "compression_loss": 59.62505340576172, "distillation_loss": 1.2433862686157227, "epoch": 2.71, "learning_rate": 4.048088663473279e-05, "loss": 60.8267, "step": 3210, "task_loss": 1.470594048500061 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5568741263976722, "compression/movement_sparsity/importance_threshold": -0.0031035361104103237, "compression/movement_sparsity/linear_layer_sparsity": 0.5065314628225439, "compression/movement_sparsity/model_sparsity": 0.48913054880154466, "compression_loss": 59.677310943603516, "distillation_loss": 2.014218807220459, "epoch": 2.71, "learning_rate": 4.047619047619048e-05, "loss": 61.7412, "step": 3211, "task_loss": 1.7117359638214111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5573648526804245, "compression/movement_sparsity/importance_threshold": -0.0031000991936569232, "compression/movement_sparsity/linear_layer_sparsity": 0.5071971533291513, "compression/movement_sparsity/model_sparsity": 0.4897733707913465, "compression_loss": 59.729522705078125, "distillation_loss": 1.9074573516845703, "epoch": 2.71, "learning_rate": 4.047149431764817e-05, "loss": 61.5469, "step": 3212, "task_loss": 1.7530325651168823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.557855216536101, "compression/movement_sparsity/importance_threshold": -0.003096664815246652, "compression/movement_sparsity/linear_layer_sparsity": 0.5078406410356207, "compression/movement_sparsity/model_sparsity": 0.49039475271549887, "compression_loss": 59.78169250488281, "distillation_loss": 2.1233768463134766, "epoch": 2.72, "learning_rate": 4.0466798159105854e-05, "loss": 61.0694, "step": 3213, "task_loss": 1.3763554096221924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5583452180985871, "compression/movement_sparsity/importance_threshold": -0.0030932329742418153, "compression/movement_sparsity/linear_layer_sparsity": 0.5085824912009183, "compression/movement_sparsity/model_sparsity": 0.49111111804542046, "compression_loss": 59.83381652832031, "distillation_loss": 1.0325219631195068, "epoch": 2.72, "learning_rate": 4.046210200056354e-05, "loss": 61.6132, "step": 3214, "task_loss": 0.5445683598518372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5588348575017676, "compression/movement_sparsity/importance_threshold": -0.0030898036697047156, "compression/movement_sparsity/linear_layer_sparsity": 0.509191744622105, "compression/movement_sparsity/model_sparsity": 0.49169944173730673, "compression_loss": 59.8858757019043, "distillation_loss": 2.161313056945801, "epoch": 2.72, "learning_rate": 4.0457405842021226e-05, "loss": 61.295, "step": 3215, "task_loss": 1.3501858711242676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5593241348795281, "compression/movement_sparsity/importance_threshold": -0.0030863769006976554, "compression/movement_sparsity/linear_layer_sparsity": 0.5100961211922794, "compression/movement_sparsity/model_sparsity": 0.4925727501901079, "compression_loss": 59.93793487548828, "distillation_loss": 2.4283390045166016, "epoch": 2.72, "learning_rate": 4.045270968347892e-05, "loss": 62.1538, "step": 3216, "task_loss": 0.999991238117218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5598130503657535, "compression/movement_sparsity/importance_threshold": -0.003082952666282942, "compression/movement_sparsity/linear_layer_sparsity": 0.5107592122303422, "compression/movement_sparsity/model_sparsity": 0.4932130620111065, "compression_loss": 59.989952087402344, "distillation_loss": 1.8886561393737793, "epoch": 2.72, "learning_rate": 4.0448013524936606e-05, "loss": 61.4217, "step": 3217, "task_loss": 1.4370546340942383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5603016040943287, "compression/movement_sparsity/importance_threshold": -0.0030795309655228783, "compression/movement_sparsity/linear_layer_sparsity": 0.5113502574475489, "compression/movement_sparsity/model_sparsity": 0.4937838030068345, "compression_loss": 60.04193878173828, "distillation_loss": 1.7901849746704102, "epoch": 2.72, "learning_rate": 4.044331736639429e-05, "loss": 61.66, "step": 3218, "task_loss": 1.1233326196670532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5607897961991393, "compression/movement_sparsity/importance_threshold": -0.0030761117974797674, "compression/movement_sparsity/linear_layer_sparsity": 0.5121048783963845, "compression/movement_sparsity/model_sparsity": 0.494512500404592, "compression_loss": 60.0938835144043, "distillation_loss": 1.6310561895370483, "epoch": 2.72, "learning_rate": 4.043862120785198e-05, "loss": 61.6303, "step": 3219, "task_loss": 1.484907865524292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.56127762681407, "compression/movement_sparsity/importance_threshold": -0.003072695161215915, "compression/movement_sparsity/linear_layer_sparsity": 0.5128085712252474, "compression/movement_sparsity/model_sparsity": 0.495192019219971, "compression_loss": 60.145790100097656, "distillation_loss": 1.3487658500671387, "epoch": 2.72, "learning_rate": 4.0433925049309665e-05, "loss": 61.7724, "step": 3220, "task_loss": 0.9770619869232178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5617650960730064, "compression/movement_sparsity/importance_threshold": -0.0030692810557936216, "compression/movement_sparsity/linear_layer_sparsity": 0.5134304404157929, "compression/movement_sparsity/model_sparsity": 0.4957925252907279, "compression_loss": 60.197654724121094, "distillation_loss": 2.3097586631774902, "epoch": 2.72, "learning_rate": 4.042922889076736e-05, "loss": 62.1246, "step": 3221, "task_loss": 1.3383524417877197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5622522041098333, "compression/movement_sparsity/importance_threshold": -0.0030658694802751943, "compression/movement_sparsity/linear_layer_sparsity": 0.5140780419600965, "compression/movement_sparsity/model_sparsity": 0.49641787972972945, "compression_loss": 60.24950408935547, "distillation_loss": 1.5397720336914062, "epoch": 2.72, "learning_rate": 4.042453273222504e-05, "loss": 61.928, "step": 3222, "task_loss": 1.3492388725280762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5627389510584361, "compression/movement_sparsity/importance_threshold": -0.0030624604337229355, "compression/movement_sparsity/linear_layer_sparsity": 0.5147180120371133, "compression/movement_sparsity/model_sparsity": 0.49703586486582246, "compression_loss": 60.301292419433594, "distillation_loss": 2.28843355178833, "epoch": 2.72, "learning_rate": 4.041983657368273e-05, "loss": 62.175, "step": 3223, "task_loss": 1.7072094678878784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5632253370526996, "compression/movement_sparsity/importance_threshold": -0.003059053915199151, "compression/movement_sparsity/linear_layer_sparsity": 0.5153410259477519, "compression/movement_sparsity/model_sparsity": 0.4976374763320156, "compression_loss": 60.35306167602539, "distillation_loss": 2.6227030754089355, "epoch": 2.72, "learning_rate": 4.041514041514042e-05, "loss": 62.2472, "step": 3224, "task_loss": 1.4026628732681274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5637113622265093, "compression/movement_sparsity/importance_threshold": -0.0030556499237661414, "compression/movement_sparsity/linear_layer_sparsity": 0.5159235692334344, "compression/movement_sparsity/model_sparsity": 0.4982000074637221, "compression_loss": 60.404754638671875, "distillation_loss": 2.2320141792297363, "epoch": 2.73, "learning_rate": 4.041044425659811e-05, "loss": 62.3531, "step": 3225, "task_loss": 1.5864291191101074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5641970267137502, "compression/movement_sparsity/importance_threshold": -0.003052248458486214, "compression/movement_sparsity/linear_layer_sparsity": 0.5167145111968888, "compression/movement_sparsity/model_sparsity": 0.4989637781375098, "compression_loss": 60.45641326904297, "distillation_loss": 1.2258720397949219, "epoch": 2.73, "learning_rate": 4.040574809805579e-05, "loss": 62.1451, "step": 3226, "task_loss": 0.7619501948356628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5646823306483073, "compression/movement_sparsity/importance_threshold": -0.003048849518421672, "compression/movement_sparsity/linear_layer_sparsity": 0.5174092609000247, "compression/movement_sparsity/model_sparsity": 0.4996346610510429, "compression_loss": 60.508087158203125, "distillation_loss": 2.245507001876831, "epoch": 2.73, "learning_rate": 4.0401051939513476e-05, "loss": 62.4336, "step": 3227, "task_loss": 1.2263633012771606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5651672741640659, "compression/movement_sparsity/importance_threshold": -0.0030454531026348182, "compression/movement_sparsity/linear_layer_sparsity": 0.5179392901514388, "compression/movement_sparsity/model_sparsity": 0.5001464821671102, "compression_loss": 60.5596809387207, "distillation_loss": 1.8125187158584595, "epoch": 2.73, "learning_rate": 4.039635578097117e-05, "loss": 62.4666, "step": 3228, "task_loss": 1.4509414434432983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.565651857394911, "compression/movement_sparsity/importance_threshold": -0.003042059210187959, "compression/movement_sparsity/linear_layer_sparsity": 0.5185626617871065, "compression/movement_sparsity/model_sparsity": 0.5007484390693773, "compression_loss": 60.611244201660156, "distillation_loss": 1.2947171926498413, "epoch": 2.73, "learning_rate": 4.0391659622428855e-05, "loss": 61.9607, "step": 3229, "task_loss": 0.3889021873474121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5661360804747277, "compression/movement_sparsity/importance_threshold": -0.003038667840143397, "compression/movement_sparsity/linear_layer_sparsity": 0.5192293539237952, "compression/movement_sparsity/model_sparsity": 0.5013922282801858, "compression_loss": 60.662784576416016, "distillation_loss": 2.9191157817840576, "epoch": 2.73, "learning_rate": 4.038696346388654e-05, "loss": 62.8384, "step": 3230, "task_loss": 1.829105019569397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5666199435374015, "compression/movement_sparsity/importance_threshold": -0.0030352789915634344, "compression/movement_sparsity/linear_layer_sparsity": 0.5198588665057954, "compression/movement_sparsity/model_sparsity": 0.502000115168387, "compression_loss": 60.714298248291016, "distillation_loss": 1.5689562559127808, "epoch": 2.73, "learning_rate": 4.038226730534423e-05, "loss": 62.389, "step": 3231, "task_loss": 1.5665154457092285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5671034467168172, "compression/movement_sparsity/importance_threshold": -0.0030318926635103773, "compression/movement_sparsity/linear_layer_sparsity": 0.5204391203512917, "compression/movement_sparsity/model_sparsity": 0.5025604355092209, "compression_loss": 60.765750885009766, "distillation_loss": 2.257338523864746, "epoch": 2.73, "learning_rate": 4.037757114680192e-05, "loss": 62.7614, "step": 3232, "task_loss": 1.7667593955993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5675865901468602, "compression/movement_sparsity/importance_threshold": -0.0030285088550465293, "compression/movement_sparsity/linear_layer_sparsity": 0.5209478291909729, "compression/movement_sparsity/model_sparsity": 0.5030516686352876, "compression_loss": 60.81721115112305, "distillation_loss": 1.8851289749145508, "epoch": 2.73, "learning_rate": 4.037287498825961e-05, "loss": 62.406, "step": 3233, "task_loss": 1.1357941627502441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5680693739614154, "compression/movement_sparsity/importance_threshold": -0.0030251275652341935, "compression/movement_sparsity/linear_layer_sparsity": 0.5216459653577173, "compression/movement_sparsity/model_sparsity": 0.5037258216769864, "compression_loss": 60.868629455566406, "distillation_loss": 1.6463727951049805, "epoch": 2.73, "learning_rate": 4.0368178829717294e-05, "loss": 62.7615, "step": 3234, "task_loss": 0.6807820796966553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5685517982943682, "compression/movement_sparsity/importance_threshold": -0.0030217487931356738, "compression/movement_sparsity/linear_layer_sparsity": 0.5221901128236124, "compression/movement_sparsity/model_sparsity": 0.5042512760034346, "compression_loss": 60.91997146606445, "distillation_loss": 2.4444820880889893, "epoch": 2.73, "learning_rate": 4.036348267117498e-05, "loss": 62.8409, "step": 3235, "task_loss": 1.5648764371871948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5690338632796035, "compression/movement_sparsity/importance_threshold": -0.0030183725378132758, "compression/movement_sparsity/linear_layer_sparsity": 0.522681877421083, "compression/movement_sparsity/model_sparsity": 0.5047261469741372, "compression_loss": 60.97130584716797, "distillation_loss": 1.3508840799331665, "epoch": 2.73, "learning_rate": 4.035878651263267e-05, "loss": 62.5742, "step": 3236, "task_loss": 0.8312810659408569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5695155690510064, "compression/movement_sparsity/importance_threshold": -0.0030149987983293033, "compression/movement_sparsity/linear_layer_sparsity": 0.5231873309629996, "compression/movement_sparsity/model_sparsity": 0.505214236631932, "compression_loss": 61.022579193115234, "distillation_loss": 2.256608247756958, "epoch": 2.74, "learning_rate": 4.035409035409036e-05, "loss": 63.2711, "step": 3237, "task_loss": 0.9562850594520569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5699969157424623, "compression/movement_sparsity/importance_threshold": -0.0030116275737460596, "compression/movement_sparsity/linear_layer_sparsity": 0.5237235846299196, "compression/movement_sparsity/model_sparsity": 0.5057320683356841, "compression_loss": 61.07379913330078, "distillation_loss": 1.7904212474822998, "epoch": 2.74, "learning_rate": 4.0349394195548046e-05, "loss": 63.194, "step": 3238, "task_loss": 1.2051401138305664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5704779034878561, "compression/movement_sparsity/importance_threshold": -0.0030082588631258475, "compression/movement_sparsity/linear_layer_sparsity": 0.5243247892379522, "compression/movement_sparsity/model_sparsity": 0.506312619715909, "compression_loss": 61.12504196166992, "distillation_loss": 2.2610394954681396, "epoch": 2.74, "learning_rate": 4.034469803700573e-05, "loss": 62.9409, "step": 3239, "task_loss": 0.9777759313583374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5709585324210729, "compression/movement_sparsity/importance_threshold": -0.0030048926655309744, "compression/movement_sparsity/linear_layer_sparsity": 0.5249596438470532, "compression/movement_sparsity/model_sparsity": 0.5069256651161461, "compression_loss": 61.17618942260742, "distillation_loss": 1.5760114192962646, "epoch": 2.74, "learning_rate": 4.034000187846342e-05, "loss": 62.9165, "step": 3240, "task_loss": 1.9501545429229736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5714388026759982, "compression/movement_sparsity/importance_threshold": -0.00300152898002374, "compression/movement_sparsity/linear_layer_sparsity": 0.5255809049050493, "compression/movement_sparsity/model_sparsity": 0.5075255839455776, "compression_loss": 61.22731018066406, "distillation_loss": 1.9022250175476074, "epoch": 2.74, "learning_rate": 4.0335305719921105e-05, "loss": 62.6099, "step": 3241, "task_loss": 0.9030871987342834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5719187143865168, "compression/movement_sparsity/importance_threshold": -0.0029981678056664515, "compression/movement_sparsity/linear_layer_sparsity": 0.5261873084501711, "compression/movement_sparsity/model_sparsity": 0.5081111556634089, "compression_loss": 61.27836608886719, "distillation_loss": 1.5674872398376465, "epoch": 2.74, "learning_rate": 4.03306095613788e-05, "loss": 62.6458, "step": 3242, "task_loss": 0.7744656801223755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.572398267686514, "compression/movement_sparsity/importance_threshold": -0.002994809141521411, "compression/movement_sparsity/linear_layer_sparsity": 0.526917162902827, "compression/movement_sparsity/model_sparsity": 0.5088159373703212, "compression_loss": 61.329410552978516, "distillation_loss": 1.3554848432540894, "epoch": 2.74, "learning_rate": 4.032591340283648e-05, "loss": 63.06, "step": 3243, "task_loss": 1.7756683826446533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.572877462709875, "compression/movement_sparsity/importance_threshold": -0.0029914529866509228, "compression/movement_sparsity/linear_layer_sparsity": 0.5275406060835005, "compression/movement_sparsity/model_sparsity": 0.5094179633598029, "compression_loss": 61.38042068481445, "distillation_loss": 1.85811448097229, "epoch": 2.74, "learning_rate": 4.032121724429417e-05, "loss": 63.3787, "step": 3244, "task_loss": 1.9419097900390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5733562995904848, "compression/movement_sparsity/importance_threshold": -0.0029880993401172912, "compression/movement_sparsity/linear_layer_sparsity": 0.5282233839223299, "compression/movement_sparsity/model_sparsity": 0.5100772856793984, "compression_loss": 61.43135452270508, "distillation_loss": 2.0800514221191406, "epoch": 2.74, "learning_rate": 4.031652108575186e-05, "loss": 63.5189, "step": 3245, "task_loss": 2.283797025680542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5738347784622286, "compression/movement_sparsity/importance_threshold": -0.0029847482009828213, "compression/movement_sparsity/linear_layer_sparsity": 0.5288371923755537, "compression/movement_sparsity/model_sparsity": 0.5106700079239581, "compression_loss": 61.48225784301758, "distillation_loss": 2.069443702697754, "epoch": 2.74, "learning_rate": 4.0311824927209544e-05, "loss": 63.3955, "step": 3246, "task_loss": 1.1308705806732178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5743128994589916, "compression/movement_sparsity/importance_threshold": -0.002981399568309815, "compression/movement_sparsity/linear_layer_sparsity": 0.5296742927919266, "compression/movement_sparsity/model_sparsity": 0.5114783513658064, "compression_loss": 61.53312301635742, "distillation_loss": 2.043513298034668, "epoch": 2.74, "learning_rate": 4.030712876866723e-05, "loss": 63.1455, "step": 3247, "task_loss": 1.7300862073898315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5747906627146587, "compression/movement_sparsity/importance_threshold": -0.0029780534411605773, "compression/movement_sparsity/linear_layer_sparsity": 0.5302532588273182, "compression/movement_sparsity/model_sparsity": 0.5120374281367746, "compression_loss": 61.5839729309082, "distillation_loss": 1.3611375093460083, "epoch": 2.75, "learning_rate": 4.0302432610124916e-05, "loss": 63.099, "step": 3248, "task_loss": 0.7622065544128418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5752680683631153, "compression/movement_sparsity/importance_threshold": -0.002974709818597413, "compression/movement_sparsity/linear_layer_sparsity": 0.5308463430771908, "compression/movement_sparsity/model_sparsity": 0.5126101381181235, "compression_loss": 61.634769439697266, "distillation_loss": 2.166154146194458, "epoch": 2.75, "learning_rate": 4.029773645158261e-05, "loss": 63.5365, "step": 3249, "task_loss": 1.5577365159988403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5757451165382462, "compression/movement_sparsity/importance_threshold": -0.0029713686996826263, "compression/movement_sparsity/linear_layer_sparsity": 0.531406790880244, "compression/movement_sparsity/model_sparsity": 0.5131513328150027, "compression_loss": 61.685523986816406, "distillation_loss": 2.445378541946411, "epoch": 2.75, "learning_rate": 4.0293040293040296e-05, "loss": 63.3946, "step": 3250, "task_loss": 1.5867260694503784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.576221807373937, "compression/movement_sparsity/importance_threshold": -0.0029680300834785183, "compression/movement_sparsity/linear_layer_sparsity": 0.5321133574335071, "compression/movement_sparsity/model_sparsity": 0.5138336266335082, "compression_loss": 61.73617172241211, "distillation_loss": 1.8073227405548096, "epoch": 2.75, "learning_rate": 4.028834413449798e-05, "loss": 63.4321, "step": 3251, "task_loss": 1.873195767402649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5766981410040727, "compression/movement_sparsity/importance_threshold": -0.0029646939690473953, "compression/movement_sparsity/linear_layer_sparsity": 0.5328044106446759, "compression/movement_sparsity/model_sparsity": 0.514500940040945, "compression_loss": 61.78681564331055, "distillation_loss": 2.415198564529419, "epoch": 2.75, "learning_rate": 4.028364797595567e-05, "loss": 63.9861, "step": 3252, "task_loss": 3.182394504547119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5771741175625382, "compression/movement_sparsity/importance_threshold": -0.002961360355451561, "compression/movement_sparsity/linear_layer_sparsity": 0.5334121377724053, "compression/movement_sparsity/model_sparsity": 0.5150877898722496, "compression_loss": 61.83740234375, "distillation_loss": 2.565702199935913, "epoch": 2.75, "learning_rate": 4.0278951817413355e-05, "loss": 63.6319, "step": 3253, "task_loss": 1.5635552406311035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.577649737183219, "compression/movement_sparsity/importance_threshold": -0.002958029241753318, "compression/movement_sparsity/linear_layer_sparsity": 0.5341089622807098, "compression/movement_sparsity/model_sparsity": 0.515760676315011, "compression_loss": 61.88798141479492, "distillation_loss": 3.2892961502075195, "epoch": 2.75, "learning_rate": 4.027425565887105e-05, "loss": 64.5098, "step": 3254, "task_loss": 2.893268346786499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.578125, "compression/movement_sparsity/importance_threshold": -0.0029547006270149723, "compression/movement_sparsity/linear_layer_sparsity": 0.5348495127117351, "compression/movement_sparsity/model_sparsity": 0.516475786560531, "compression_loss": 61.938507080078125, "distillation_loss": 2.1112589836120605, "epoch": 2.75, "learning_rate": 4.0269559500328734e-05, "loss": 63.6508, "step": 3255, "task_loss": 0.6777961850166321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5785999061467664, "compression/movement_sparsity/importance_threshold": -0.002951374510298827, "compression/movement_sparsity/linear_layer_sparsity": 0.5354863825051666, "compression/movement_sparsity/model_sparsity": 0.5170907779173173, "compression_loss": 61.98895263671875, "distillation_loss": 1.5397002696990967, "epoch": 2.75, "learning_rate": 4.026486334178642e-05, "loss": 63.4923, "step": 3256, "task_loss": 1.7349225282669067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5790744557574032, "compression/movement_sparsity/importance_threshold": -0.0029480508906671856, "compression/movement_sparsity/linear_layer_sparsity": 0.536085297673013, "compression/movement_sparsity/model_sparsity": 0.5176691185066697, "compression_loss": 62.03940963745117, "distillation_loss": 2.716867685317993, "epoch": 2.75, "learning_rate": 4.026016718324411e-05, "loss": 64.047, "step": 3257, "task_loss": 2.048130512237549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5795486489657957, "compression/movement_sparsity/importance_threshold": -0.002944729767182354, "compression/movement_sparsity/linear_layer_sparsity": 0.5366919158531522, "compression/movement_sparsity/model_sparsity": 0.5182548974861454, "compression_loss": 62.089839935302734, "distillation_loss": 1.693877935409546, "epoch": 2.75, "learning_rate": 4.025547102470179e-05, "loss": 64.1566, "step": 3258, "task_loss": 1.4377888441085815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5800224859058292, "compression/movement_sparsity/importance_threshold": -0.0029414111389066333, "compression/movement_sparsity/linear_layer_sparsity": 0.5373102793384128, "compression/movement_sparsity/model_sparsity": 0.5188520182833787, "compression_loss": 62.14025115966797, "distillation_loss": 2.2593026161193848, "epoch": 2.75, "learning_rate": 4.0250774866159486e-05, "loss": 64.2672, "step": 3259, "task_loss": 1.5536476373672485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5804959667113884, "compression/movement_sparsity/importance_threshold": -0.0029380950049023302, "compression/movement_sparsity/linear_layer_sparsity": 0.537951119879667, "compression/movement_sparsity/model_sparsity": 0.5194708439805847, "compression_loss": 62.19060134887695, "distillation_loss": 3.4538440704345703, "epoch": 2.76, "learning_rate": 4.0246078707617166e-05, "loss": 64.6068, "step": 3260, "task_loss": 1.847519040107727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.580969091516359, "compression/movement_sparsity/importance_threshold": -0.0029347813642317455, "compression/movement_sparsity/linear_layer_sparsity": 0.538595764230397, "compression/movement_sparsity/model_sparsity": 0.5200933428147092, "compression_loss": 62.24092102050781, "distillation_loss": 2.5441994667053223, "epoch": 2.76, "learning_rate": 4.024138254907486e-05, "loss": 64.0328, "step": 3261, "task_loss": 2.2425315380096436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5814418604546254, "compression/movement_sparsity/importance_threshold": -0.0029314702159571873, "compression/movement_sparsity/linear_layer_sparsity": 0.5392170491367284, "compression/movement_sparsity/model_sparsity": 0.5206932846732121, "compression_loss": 62.29121398925781, "distillation_loss": 2.041661500930786, "epoch": 2.76, "learning_rate": 4.0236686390532545e-05, "loss": 64.5341, "step": 3262, "task_loss": 2.105586051940918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5819142736600736, "compression/movement_sparsity/importance_threshold": -0.002928161559140955, "compression/movement_sparsity/linear_layer_sparsity": 0.539761208526791, "compression/movement_sparsity/model_sparsity": 0.521218750514196, "compression_loss": 62.341468811035156, "distillation_loss": 1.197457194328308, "epoch": 2.76, "learning_rate": 4.023199023199024e-05, "loss": 63.9823, "step": 3263, "task_loss": 0.9937968850135803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.582386331266588, "compression/movement_sparsity/importance_threshold": -0.0029248553928453574, "compression/movement_sparsity/linear_layer_sparsity": 0.5405459499230748, "compression/movement_sparsity/model_sparsity": 0.5219765336293705, "compression_loss": 62.39168167114258, "distillation_loss": 1.215428113937378, "epoch": 2.76, "learning_rate": 4.0227294073447925e-05, "loss": 64.2882, "step": 3264, "task_loss": 1.1487324237823486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5828580334080542, "compression/movement_sparsity/importance_threshold": -0.0029215517161326953, "compression/movement_sparsity/linear_layer_sparsity": 0.5410592495672958, "compression/movement_sparsity/model_sparsity": 0.5224721998517181, "compression_loss": 62.441856384277344, "distillation_loss": 2.4234817028045654, "epoch": 2.76, "learning_rate": 4.0222597914905604e-05, "loss": 64.4835, "step": 3265, "task_loss": 1.4215269088745117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5833293802183572, "compression/movement_sparsity/importance_threshold": -0.0029182505280652728, "compression/movement_sparsity/linear_layer_sparsity": 0.5417021649137186, "compression/movement_sparsity/model_sparsity": 0.5230930290781524, "compression_loss": 62.491973876953125, "distillation_loss": 2.471052408218384, "epoch": 2.76, "learning_rate": 4.02179017563633e-05, "loss": 64.6163, "step": 3266, "task_loss": 1.6392581462860107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.583800371831382, "compression/movement_sparsity/importance_threshold": -0.0029149518277053962, "compression/movement_sparsity/linear_layer_sparsity": 0.5423657448426544, "compression/movement_sparsity/model_sparsity": 0.5237338129951186, "compression_loss": 62.54207229614258, "distillation_loss": 1.6593048572540283, "epoch": 2.76, "learning_rate": 4.0213205597820984e-05, "loss": 64.208, "step": 3267, "task_loss": 0.7882299423217773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5842710083810142, "compression/movement_sparsity/importance_threshold": -0.0029116556141153644, "compression/movement_sparsity/linear_layer_sparsity": 0.5430153019504504, "compression/movement_sparsity/model_sparsity": 0.5243610558179904, "compression_loss": 62.5921630859375, "distillation_loss": 2.1852846145629883, "epoch": 2.76, "learning_rate": 4.020850943927868e-05, "loss": 64.5344, "step": 3268, "task_loss": 1.5038312673568726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5847412900011383, "compression/movement_sparsity/importance_threshold": -0.002908361886357488, "compression/movement_sparsity/linear_layer_sparsity": 0.5436665284417155, "compression/movement_sparsity/model_sparsity": 0.5249899106758734, "compression_loss": 62.64218521118164, "distillation_loss": 3.0454206466674805, "epoch": 2.76, "learning_rate": 4.0203813280736356e-05, "loss": 64.8583, "step": 3269, "task_loss": 2.502751350402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.58521121682564, "compression/movement_sparsity/importance_threshold": -0.0029050706434940654, "compression/movement_sparsity/linear_layer_sparsity": 0.5444049682950691, "compression/movement_sparsity/model_sparsity": 0.5257029828485578, "compression_loss": 62.69215393066406, "distillation_loss": 1.3584942817687988, "epoch": 2.76, "learning_rate": 4.019911712219405e-05, "loss": 64.272, "step": 3270, "task_loss": 1.1215400695800781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5856807889884039, "compression/movement_sparsity/importance_threshold": -0.002901781884587405, "compression/movement_sparsity/linear_layer_sparsity": 0.5451045472860976, "compression/movement_sparsity/model_sparsity": 0.5263785291490877, "compression_loss": 62.742103576660156, "distillation_loss": 1.1327388286590576, "epoch": 2.76, "learning_rate": 4.0194420963651736e-05, "loss": 64.9521, "step": 3271, "task_loss": 1.3774384260177612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5861500066233158, "compression/movement_sparsity/importance_threshold": -0.002898495608699806, "compression/movement_sparsity/linear_layer_sparsity": 0.5458082520391281, "compression/movement_sparsity/model_sparsity": 0.5270580594790025, "compression_loss": 62.791954040527344, "distillation_loss": 2.9390554428100586, "epoch": 2.77, "learning_rate": 4.018972480510942e-05, "loss": 65.0331, "step": 3272, "task_loss": 2.722241163253784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5866188698642603, "compression/movement_sparsity/importance_threshold": -0.002895211814893578, "compression/movement_sparsity/linear_layer_sparsity": 0.5464149059917702, "compression/movement_sparsity/model_sparsity": 0.5276438730020856, "compression_loss": 62.841827392578125, "distillation_loss": 2.035230875015259, "epoch": 2.77, "learning_rate": 4.018502864656711e-05, "loss": 65.0773, "step": 3273, "task_loss": 1.7724132537841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5870873788451227, "compression/movement_sparsity/importance_threshold": -0.00289193050223102, "compression/movement_sparsity/linear_layer_sparsity": 0.5470722257326972, "compression/movement_sparsity/model_sparsity": 0.5282786117877596, "compression_loss": 62.891624450683594, "distillation_loss": 2.7584996223449707, "epoch": 2.77, "learning_rate": 4.0180332488024795e-05, "loss": 65.0842, "step": 3274, "task_loss": 1.3805007934570312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5875555336997882, "compression/movement_sparsity/importance_threshold": -0.002888651669774439, "compression/movement_sparsity/linear_layer_sparsity": 0.5478709064809473, "compression/movement_sparsity/model_sparsity": 0.5290498553952779, "compression_loss": 62.9413948059082, "distillation_loss": 1.578752040863037, "epoch": 2.77, "learning_rate": 4.017563632948249e-05, "loss": 64.3718, "step": 3275, "task_loss": 1.3385707139968872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.588023334562142, "compression/movement_sparsity/importance_threshold": -0.0028853753165861374, "compression/movement_sparsity/linear_layer_sparsity": 0.5485831727863403, "compression/movement_sparsity/model_sparsity": 0.5297376531618933, "compression_loss": 62.99110412597656, "distillation_loss": 2.244793176651001, "epoch": 2.77, "learning_rate": 4.0170940170940174e-05, "loss": 64.8635, "step": 3276, "task_loss": 1.7686864137649536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5884907815660689, "compression/movement_sparsity/importance_threshold": -0.00288210144172842, "compression/movement_sparsity/linear_layer_sparsity": 0.54919685007372, "compression/movement_sparsity/model_sparsity": 0.5303302487465593, "compression_loss": 63.040802001953125, "distillation_loss": 2.023423671722412, "epoch": 2.77, "learning_rate": 4.016624401239786e-05, "loss": 65.1005, "step": 3277, "task_loss": 0.7540304064750671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5889578748454543, "compression/movement_sparsity/importance_threshold": -0.0028788300442635925, "compression/movement_sparsity/linear_layer_sparsity": 0.5497256869083705, "compression/movement_sparsity/model_sparsity": 0.5308409184090472, "compression_loss": 63.0904426574707, "distillation_loss": 1.6696819067001343, "epoch": 2.77, "learning_rate": 4.016154785385555e-05, "loss": 64.8621, "step": 3278, "task_loss": 0.9298416376113892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5894246145341837, "compression/movement_sparsity/importance_threshold": -0.0028755611232539526, "compression/movement_sparsity/linear_layer_sparsity": 0.5503212394609437, "compression/movement_sparsity/model_sparsity": 0.5314160118993055, "compression_loss": 63.1400260925293, "distillation_loss": 2.2118468284606934, "epoch": 2.77, "learning_rate": 4.0156851695313233e-05, "loss": 65.2393, "step": 3279, "task_loss": 1.3641406297683716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5898910007661413, "compression/movement_sparsity/importance_threshold": -0.0028722946777618137, "compression/movement_sparsity/linear_layer_sparsity": 0.5508957935543101, "compression/movement_sparsity/model_sparsity": 0.5319708282920297, "compression_loss": 63.1895637512207, "distillation_loss": 2.0860366821289062, "epoch": 2.77, "learning_rate": 4.0152155536770927e-05, "loss": 64.7543, "step": 3280, "task_loss": 1.0176345109939575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5903570336752134, "compression/movement_sparsity/importance_threshold": -0.0028690307068494694, "compression/movement_sparsity/linear_layer_sparsity": 0.5515128573052984, "compression/movement_sparsity/model_sparsity": 0.5325666940048613, "compression_loss": 63.23905944824219, "distillation_loss": 2.1340889930725098, "epoch": 2.77, "learning_rate": 4.014745937822861e-05, "loss": 65.3394, "step": 3281, "task_loss": 1.4301774501800537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5908227133952841, "compression/movement_sparsity/importance_threshold": -0.002865769209579233, "compression/movement_sparsity/linear_layer_sparsity": 0.5521910085670851, "compression/movement_sparsity/model_sparsity": 0.5332215486845685, "compression_loss": 63.288475036621094, "distillation_loss": 2.6284728050231934, "epoch": 2.77, "learning_rate": 4.01427632196863e-05, "loss": 65.4703, "step": 3282, "task_loss": 2.27504563331604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5912880400602394, "compression/movement_sparsity/importance_threshold": -0.0028625101850134017, "compression/movement_sparsity/linear_layer_sparsity": 0.5528403391156961, "compression/movement_sparsity/model_sparsity": 0.5338485727312602, "compression_loss": 63.33785629272461, "distillation_loss": 1.3243110179901123, "epoch": 2.77, "learning_rate": 4.0138067061143986e-05, "loss": 64.9801, "step": 3283, "task_loss": 1.5111138820648193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5917530138039635, "compression/movement_sparsity/importance_threshold": -0.002859253632214286, "compression/movement_sparsity/linear_layer_sparsity": 0.5535664016830438, "compression/movement_sparsity/model_sparsity": 0.5345496928157898, "compression_loss": 63.3872184753418, "distillation_loss": 2.197932720184326, "epoch": 2.78, "learning_rate": 4.013337090260167e-05, "loss": 65.3257, "step": 3284, "task_loss": 1.1503630876541138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5922176347603425, "compression/movement_sparsity/importance_threshold": -0.0028559995502441824, "compression/movement_sparsity/linear_layer_sparsity": 0.55432217927614, "compression/movement_sparsity/model_sparsity": 0.5352795071235193, "compression_loss": 63.436546325683594, "distillation_loss": 1.4603022336959839, "epoch": 2.78, "learning_rate": 4.0128674744059365e-05, "loss": 64.9047, "step": 3285, "task_loss": 0.7153134942054749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5926819030632609, "compression/movement_sparsity/importance_threshold": -0.0028527479381654005, "compression/movement_sparsity/linear_layer_sparsity": 0.554878215137168, "compression/movement_sparsity/model_sparsity": 0.5358164414421546, "compression_loss": 63.485843658447266, "distillation_loss": 2.9051706790924072, "epoch": 2.78, "learning_rate": 4.0123978585517045e-05, "loss": 65.7868, "step": 3286, "task_loss": 2.030365467071533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5931458188466039, "compression/movement_sparsity/importance_threshold": -0.0028494987950402435, "compression/movement_sparsity/linear_layer_sparsity": 0.5555114599836382, "compression/movement_sparsity/model_sparsity": 0.5364279323800595, "compression_loss": 63.53511047363281, "distillation_loss": 1.8905597925186157, "epoch": 2.78, "learning_rate": 4.011928242697474e-05, "loss": 65.2966, "step": 3287, "task_loss": 1.7784000635147095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5936093822442572, "compression/movement_sparsity/importance_threshold": -0.002846252119931011, "compression/movement_sparsity/linear_layer_sparsity": 0.5560774167521391, "compression/movement_sparsity/model_sparsity": 0.5369744467924757, "compression_loss": 63.58433532714844, "distillation_loss": 1.6999075412750244, "epoch": 2.78, "learning_rate": 4.0114586268432424e-05, "loss": 65.319, "step": 3288, "task_loss": 0.8752533793449402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5940725933901052, "compression/movement_sparsity/importance_threshold": -0.002843007911900013, "compression/movement_sparsity/linear_layer_sparsity": 0.556637709541013, "compression/movement_sparsity/model_sparsity": 0.5375154918003897, "compression_loss": 63.63349151611328, "distillation_loss": 1.7299723625183105, "epoch": 2.78, "learning_rate": 4.010989010989011e-05, "loss": 65.6627, "step": 3289, "task_loss": 0.3448010981082916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5945354524180337, "compression/movement_sparsity/importance_threshold": -0.002839766170009547, "compression/movement_sparsity/linear_layer_sparsity": 0.5572899257381918, "compression/movement_sparsity/model_sparsity": 0.5381453023647437, "compression_loss": 63.682621002197266, "distillation_loss": 2.7418320178985596, "epoch": 2.78, "learning_rate": 4.01051939513478e-05, "loss": 65.4854, "step": 3290, "task_loss": 1.7145569324493408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5949979594619271, "compression/movement_sparsity/importance_threshold": -0.002836526893321925, "compression/movement_sparsity/linear_layer_sparsity": 0.5578565502600804, "compression/movement_sparsity/model_sparsity": 0.5386924615911645, "compression_loss": 63.73171615600586, "distillation_loss": 2.606372356414795, "epoch": 2.78, "learning_rate": 4.010049779280548e-05, "loss": 65.9603, "step": 3291, "task_loss": 1.443753957748413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5954601146556714, "compression/movement_sparsity/importance_threshold": -0.002833290080899444, "compression/movement_sparsity/linear_layer_sparsity": 0.5584307466284177, "compression/movement_sparsity/model_sparsity": 0.5392469325478148, "compression_loss": 63.780799865722656, "distillation_loss": 2.7498373985290527, "epoch": 2.78, "learning_rate": 4.0095801634263176e-05, "loss": 65.5984, "step": 3292, "task_loss": 2.266296863555908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.595921918133151, "compression/movement_sparsity/importance_threshold": -0.002830055731804411, "compression/movement_sparsity/linear_layer_sparsity": 0.558974524445116, "compression/movement_sparsity/model_sparsity": 0.5397720299236533, "compression_loss": 63.829811096191406, "distillation_loss": 2.164581775665283, "epoch": 2.78, "learning_rate": 4.009110547572086e-05, "loss": 65.972, "step": 3293, "task_loss": 0.7952256202697754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5963833700282513, "compression/movement_sparsity/importance_threshold": -0.0028268238450991313, "compression/movement_sparsity/linear_layer_sparsity": 0.5596265737039479, "compression/movement_sparsity/model_sparsity": 0.5404016792845061, "compression_loss": 63.87882995605469, "distillation_loss": 2.37937068939209, "epoch": 2.78, "learning_rate": 4.0086409317178556e-05, "loss": 66.034, "step": 3294, "task_loss": 0.9546102285385132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5968444704748577, "compression/movement_sparsity/importance_threshold": -0.002823594419845905, "compression/movement_sparsity/linear_layer_sparsity": 0.5601694095114029, "compression/movement_sparsity/model_sparsity": 0.5409258670120167, "compression_loss": 63.92778015136719, "distillation_loss": 2.147037982940674, "epoch": 2.78, "learning_rate": 4.0081713158636235e-05, "loss": 66.2352, "step": 3295, "task_loss": 1.916335940361023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5973052196068549, "compression/movement_sparsity/importance_threshold": -0.0028203674551070396, "compression/movement_sparsity/linear_layer_sparsity": 0.5607746564122641, "compression/movement_sparsity/model_sparsity": 0.541510321819876, "compression_loss": 63.97677230834961, "distillation_loss": 2.069258451461792, "epoch": 2.79, "learning_rate": 4.007701700009392e-05, "loss": 65.986, "step": 3296, "task_loss": 1.8564364910125732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5977656175581283, "compression/movement_sparsity/importance_threshold": -0.002817142949944838, "compression/movement_sparsity/linear_layer_sparsity": 0.5614591990279036, "compression/movement_sparsity/model_sparsity": 0.5421713482907691, "compression_loss": 64.02571105957031, "distillation_loss": 2.293659210205078, "epoch": 2.79, "learning_rate": 4.0072320841551615e-05, "loss": 66.058, "step": 3297, "task_loss": 2.4276583194732666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.598225664462563, "compression/movement_sparsity/importance_threshold": -0.0028139209034216033, "compression/movement_sparsity/linear_layer_sparsity": 0.5621149090061998, "compression/movement_sparsity/model_sparsity": 0.5428045326141109, "compression_loss": 64.07460021972656, "distillation_loss": 2.927645683288574, "epoch": 2.79, "learning_rate": 4.00676246830093e-05, "loss": 66.5742, "step": 3298, "task_loss": 2.8492398262023926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5986853604540443, "compression/movement_sparsity/importance_threshold": -0.0028107013145996394, "compression/movement_sparsity/linear_layer_sparsity": 0.5627901030744129, "compression/movement_sparsity/model_sparsity": 0.5434565316889409, "compression_loss": 64.12342834472656, "distillation_loss": 1.5884689092636108, "epoch": 2.79, "learning_rate": 4.006292852446699e-05, "loss": 65.89, "step": 3299, "task_loss": 0.5364198684692383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.599144705666457, "compression/movement_sparsity/importance_threshold": -0.002807484182541253, "compression/movement_sparsity/linear_layer_sparsity": 0.5633472955797015, "compression/movement_sparsity/model_sparsity": 0.5439945829175482, "compression_loss": 64.17223358154297, "distillation_loss": 1.506592869758606, "epoch": 2.79, "learning_rate": 4.0058232365924674e-05, "loss": 66.099, "step": 3300, "task_loss": 1.3622877597808838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5996037002336866, "compression/movement_sparsity/importance_threshold": -0.002804269506308743, "compression/movement_sparsity/linear_layer_sparsity": 0.5639174735035456, "compression/movement_sparsity/model_sparsity": 0.5445451734756358, "compression_loss": 64.22098541259766, "distillation_loss": 1.782759428024292, "epoch": 2.79, "learning_rate": 4.005353620738237e-05, "loss": 66.5371, "step": 3301, "task_loss": 1.2197626829147339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6000623442896178, "compression/movement_sparsity/importance_threshold": -0.0028010572849644193, "compression/movement_sparsity/linear_layer_sparsity": 0.5644473715891157, "compression/movement_sparsity/model_sparsity": 0.5450568679318094, "compression_loss": 64.26970672607422, "distillation_loss": 2.159693956375122, "epoch": 2.79, "learning_rate": 4.004884004884005e-05, "loss": 66.7084, "step": 3302, "task_loss": 1.0949251651763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6005206379681363, "compression/movement_sparsity/importance_threshold": -0.002797847517570581, "compression/movement_sparsity/linear_layer_sparsity": 0.5650847302734202, "compression/movement_sparsity/model_sparsity": 0.5456723313845634, "compression_loss": 64.31838989257812, "distillation_loss": 1.7270528078079224, "epoch": 2.79, "learning_rate": 4.004414389029774e-05, "loss": 66.5947, "step": 3303, "task_loss": 2.5444464683532715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6009785814031265, "compression/movement_sparsity/importance_threshold": -0.0027946402031895365, "compression/movement_sparsity/linear_layer_sparsity": 0.5658131061292893, "compression/movement_sparsity/model_sparsity": 0.5463756852890371, "compression_loss": 64.36705017089844, "distillation_loss": 2.8622612953186035, "epoch": 2.79, "learning_rate": 4.0039447731755426e-05, "loss": 66.7382, "step": 3304, "task_loss": 1.72420334815979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6014361747284744, "compression/movement_sparsity/importance_threshold": -0.0027914353408835853, "compression/movement_sparsity/linear_layer_sparsity": 0.5663598530637288, "compression/movement_sparsity/model_sparsity": 0.5469036497842884, "compression_loss": 64.41565704345703, "distillation_loss": 2.7856898307800293, "epoch": 2.79, "learning_rate": 4.003475157321311e-05, "loss": 66.4833, "step": 3305, "task_loss": 2.3755033016204834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6018934180780646, "compression/movement_sparsity/importance_threshold": -0.002788232929715034, "compression/movement_sparsity/linear_layer_sparsity": 0.5669857406987676, "compression/movement_sparsity/model_sparsity": 0.5475080362536081, "compression_loss": 64.4642333984375, "distillation_loss": 2.2971198558807373, "epoch": 2.79, "learning_rate": 4.0030055414670805e-05, "loss": 66.1085, "step": 3306, "task_loss": 1.1109089851379395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6023503115857822, "compression/movement_sparsity/importance_threshold": -0.0027850329687461872, "compression/movement_sparsity/linear_layer_sparsity": 0.5676101616611873, "compression/movement_sparsity/model_sparsity": 0.548111006435025, "compression_loss": 64.51274108886719, "distillation_loss": 3.0211799144744873, "epoch": 2.79, "learning_rate": 4.0025359256128485e-05, "loss": 66.9633, "step": 3307, "task_loss": 1.5738840103149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6028068553855128, "compression/movement_sparsity/importance_threshold": -0.0027818354570393455, "compression/movement_sparsity/linear_layer_sparsity": 0.5682271300188345, "compression/movement_sparsity/model_sparsity": 0.5487067800315704, "compression_loss": 64.56127166748047, "distillation_loss": 2.280118227005005, "epoch": 2.8, "learning_rate": 4.002066309758618e-05, "loss": 66.6821, "step": 3308, "task_loss": 1.9887398481369019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6032630496111409, "compression/movement_sparsity/importance_threshold": -0.002778640393656818, "compression/movement_sparsity/linear_layer_sparsity": 0.5688846524706113, "compression/movement_sparsity/model_sparsity": 0.5493417145643529, "compression_loss": 64.60972595214844, "distillation_loss": 2.3958847522735596, "epoch": 2.8, "learning_rate": 4.0015966939043864e-05, "loss": 67.1087, "step": 3309, "task_loss": 1.8467392921447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6037188943965524, "compression/movement_sparsity/importance_threshold": -0.002775447777660902, "compression/movement_sparsity/linear_layer_sparsity": 0.5695057108177575, "compression/movement_sparsity/model_sparsity": 0.5499414376466758, "compression_loss": 64.65817260742188, "distillation_loss": 1.6658375263214111, "epoch": 2.8, "learning_rate": 4.001127078050155e-05, "loss": 66.5614, "step": 3310, "task_loss": 2.180555820465088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6041743898756315, "compression/movement_sparsity/importance_threshold": -0.0027722576081139092, "compression/movement_sparsity/linear_layer_sparsity": 0.5701782457965878, "compression/movement_sparsity/model_sparsity": 0.5505908689800236, "compression_loss": 64.70655059814453, "distillation_loss": 2.74625825881958, "epoch": 2.8, "learning_rate": 4.0006574621959244e-05, "loss": 66.9444, "step": 3311, "task_loss": 2.65535831451416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6046295361822644, "compression/movement_sparsity/importance_threshold": -0.002769069884078136, "compression/movement_sparsity/linear_layer_sparsity": 0.5707021579500048, "compression/movement_sparsity/model_sparsity": 0.5510967831392284, "compression_loss": 64.75489044189453, "distillation_loss": 1.6281743049621582, "epoch": 2.8, "learning_rate": 4.000187846341692e-05, "loss": 66.647, "step": 3312, "task_loss": 0.9411493539810181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6050843334503353, "compression/movement_sparsity/importance_threshold": -0.0027658846046158935, "compression/movement_sparsity/linear_layer_sparsity": 0.5714760006570694, "compression/movement_sparsity/model_sparsity": 0.5518440419686867, "compression_loss": 64.80323028564453, "distillation_loss": 2.453622579574585, "epoch": 2.8, "learning_rate": 3.9997182304874616e-05, "loss": 67.3421, "step": 3313, "task_loss": 1.5507417917251587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6055387818137299, "compression/movement_sparsity/importance_threshold": -0.00276270176878948, "compression/movement_sparsity/linear_layer_sparsity": 0.5720083551211724, "compression/movement_sparsity/model_sparsity": 0.5523581084192339, "compression_loss": 64.85147857666016, "distillation_loss": 1.681124210357666, "epoch": 2.8, "learning_rate": 3.99924861463323e-05, "loss": 66.9733, "step": 3314, "task_loss": 1.5357396602630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6059928814063333, "compression/movement_sparsity/importance_threshold": -0.002759521375661202, "compression/movement_sparsity/linear_layer_sparsity": 0.5726030610578434, "compression/movement_sparsity/model_sparsity": 0.552932384377451, "compression_loss": 64.89973449707031, "distillation_loss": 0.991226077079773, "epoch": 2.8, "learning_rate": 3.998778998778999e-05, "loss": 66.6648, "step": 3315, "task_loss": 0.5043244957923889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6064466323620304, "compression/movement_sparsity/importance_threshold": -0.002756343424293363, "compression/movement_sparsity/linear_layer_sparsity": 0.5731223824067205, "compression/movement_sparsity/model_sparsity": 0.5534338654403748, "compression_loss": 64.9478988647461, "distillation_loss": 2.238922595977783, "epoch": 2.8, "learning_rate": 3.9983093829247675e-05, "loss": 67.4722, "step": 3316, "task_loss": 1.810430884361267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6069000348147066, "compression/movement_sparsity/importance_threshold": -0.002753167913748267, "compression/movement_sparsity/linear_layer_sparsity": 0.5737382179684424, "compression/movement_sparsity/model_sparsity": 0.5540285451560196, "compression_loss": 64.99605560302734, "distillation_loss": 1.8672842979431152, "epoch": 2.8, "learning_rate": 3.997839767070536e-05, "loss": 67.0269, "step": 3317, "task_loss": 0.9731149077415466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6073530888982466, "compression/movement_sparsity/importance_threshold": -0.0027499948430882203, "compression/movement_sparsity/linear_layer_sparsity": 0.5743560925628298, "compression/movement_sparsity/model_sparsity": 0.5546251938572854, "compression_loss": 65.044189453125, "distillation_loss": 1.3677279949188232, "epoch": 2.8, "learning_rate": 3.9973701512163055e-05, "loss": 67.1439, "step": 3318, "task_loss": 1.156369924545288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6078057947465363, "compression/movement_sparsity/importance_threshold": -0.0027468242113755214, "compression/movement_sparsity/linear_layer_sparsity": 0.5750406947993076, "compression/movement_sparsity/model_sparsity": 0.5552862779008573, "compression_loss": 65.09223937988281, "distillation_loss": 2.0157811641693115, "epoch": 2.81, "learning_rate": 3.996900535362074e-05, "loss": 66.978, "step": 3319, "task_loss": 1.4118953943252563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6082581524934598, "compression/movement_sparsity/importance_threshold": -0.0027436560176724812, "compression/movement_sparsity/linear_layer_sparsity": 0.5757692614418589, "compression/movement_sparsity/model_sparsity": 0.5559898160379038, "compression_loss": 65.1402816772461, "distillation_loss": 1.8784645795822144, "epoch": 2.81, "learning_rate": 3.996430919507843e-05, "loss": 67.342, "step": 3320, "task_loss": 0.907711386680603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6087101622729034, "compression/movement_sparsity/importance_threshold": -0.002740490261041396, "compression/movement_sparsity/linear_layer_sparsity": 0.5762231071862475, "compression/movement_sparsity/model_sparsity": 0.5564280707847797, "compression_loss": 65.18828582763672, "distillation_loss": 2.22189998626709, "epoch": 2.81, "learning_rate": 3.9959613036536114e-05, "loss": 67.7007, "step": 3321, "task_loss": 0.9296095967292786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6091618242187513, "compression/movement_sparsity/importance_threshold": -0.002737326940544577, "compression/movement_sparsity/linear_layer_sparsity": 0.5767483548464396, "compression/movement_sparsity/model_sparsity": 0.5569352745719934, "compression_loss": 65.23625183105469, "distillation_loss": 2.2014453411102295, "epoch": 2.81, "learning_rate": 3.99549168779938e-05, "loss": 67.5935, "step": 3322, "task_loss": 1.5445044040679932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6096131384648894, "compression/movement_sparsity/importance_threshold": -0.002734166055244322, "compression/movement_sparsity/linear_layer_sparsity": 0.5772872914510777, "compression/movement_sparsity/model_sparsity": 0.5574556970462994, "compression_loss": 65.2842025756836, "distillation_loss": 2.454529285430908, "epoch": 2.81, "learning_rate": 3.995022071945149e-05, "loss": 67.1978, "step": 3323, "task_loss": 0.5902627110481262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.610064105145202, "compression/movement_sparsity/importance_threshold": -0.002731007604202941, "compression/movement_sparsity/linear_layer_sparsity": 0.5778907139542906, "compression/movement_sparsity/model_sparsity": 0.5580383901301821, "compression_loss": 65.33211517333984, "distillation_loss": 2.196749687194824, "epoch": 2.81, "learning_rate": 3.994552456090917e-05, "loss": 67.3193, "step": 3324, "task_loss": 1.0639160871505737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.610514724393575, "compression/movement_sparsity/importance_threshold": -0.002727851586482734, "compression/movement_sparsity/linear_layer_sparsity": 0.5783594887565593, "compression/movement_sparsity/model_sparsity": 0.5584910610758729, "compression_loss": 65.37999725341797, "distillation_loss": 1.3598170280456543, "epoch": 2.81, "learning_rate": 3.9940828402366866e-05, "loss": 67.58, "step": 3325, "task_loss": 1.3490220308303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6109649963438932, "compression/movement_sparsity/importance_threshold": -0.0027246980011460045, "compression/movement_sparsity/linear_layer_sparsity": 0.579070061830148, "compression/movement_sparsity/model_sparsity": 0.5591772237784053, "compression_loss": 65.42781066894531, "distillation_loss": 1.074196696281433, "epoch": 2.81, "learning_rate": 3.993613224382455e-05, "loss": 67.5323, "step": 3326, "task_loss": 0.4648706912994385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6114149211300417, "compression/movement_sparsity/importance_threshold": -0.0027215468472550595, "compression/movement_sparsity/linear_layer_sparsity": 0.5796052184736455, "compression/movement_sparsity/model_sparsity": 0.5596939961448644, "compression_loss": 65.4756088256836, "distillation_loss": 1.6453258991241455, "epoch": 2.81, "learning_rate": 3.9931436085282246e-05, "loss": 67.6557, "step": 3327, "task_loss": 0.37027639150619507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6118644988859057, "compression/movement_sparsity/importance_threshold": -0.002718398123872202, "compression/movement_sparsity/linear_layer_sparsity": 0.5802011645237507, "compression/movement_sparsity/model_sparsity": 0.5602694696148038, "compression_loss": 65.52339172363281, "distillation_loss": 2.628255844116211, "epoch": 2.81, "learning_rate": 3.992673992673993e-05, "loss": 67.8869, "step": 3328, "task_loss": 2.39514422416687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6123137297453702, "compression/movement_sparsity/importance_threshold": -0.0027152518300597346, "compression/movement_sparsity/linear_layer_sparsity": 0.5808247627186034, "compression/movement_sparsity/model_sparsity": 0.560871645293251, "compression_loss": 65.57113647460938, "distillation_loss": 1.8479111194610596, "epoch": 2.81, "learning_rate": 3.992204376819761e-05, "loss": 67.3738, "step": 3329, "task_loss": 2.0053887367248535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.612762613842321, "compression/movement_sparsity/importance_threshold": -0.00271210796487996, "compression/movement_sparsity/linear_layer_sparsity": 0.5813061294418955, "compression/movement_sparsity/model_sparsity": 0.5613364755887408, "compression_loss": 65.61878967285156, "distillation_loss": 2.45310115814209, "epoch": 2.81, "learning_rate": 3.9917347609655305e-05, "loss": 68.7122, "step": 3330, "task_loss": 0.6467757225036621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6132111513106422, "compression/movement_sparsity/importance_threshold": -0.0027089665273951876, "compression/movement_sparsity/linear_layer_sparsity": 0.5819125449111849, "compression/movement_sparsity/model_sparsity": 0.561922058821108, "compression_loss": 65.66645812988281, "distillation_loss": 2.1474649906158447, "epoch": 2.82, "learning_rate": 3.991265145111299e-05, "loss": 67.877, "step": 3331, "task_loss": 1.3735401630401611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6136593422842198, "compression/movement_sparsity/importance_threshold": -0.0027058275166677156, "compression/movement_sparsity/linear_layer_sparsity": 0.5825284639420802, "compression/movement_sparsity/model_sparsity": 0.5625168191385034, "compression_loss": 65.71409606933594, "distillation_loss": 2.8586883544921875, "epoch": 2.82, "learning_rate": 3.9907955292570684e-05, "loss": 67.9868, "step": 3332, "task_loss": 2.0969676971435547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6141071868969384, "compression/movement_sparsity/importance_threshold": -0.002702690931759852, "compression/movement_sparsity/linear_layer_sparsity": 0.5830267152867448, "compression/movement_sparsity/model_sparsity": 0.5629979540166783, "compression_loss": 65.76168060302734, "distillation_loss": 2.353996753692627, "epoch": 2.82, "learning_rate": 3.9903259134028364e-05, "loss": 67.9128, "step": 3333, "task_loss": 2.0860259532928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6145546852826838, "compression/movement_sparsity/importance_threshold": -0.002699556771733897, "compression/movement_sparsity/linear_layer_sparsity": 0.5836051328104251, "compression/movement_sparsity/model_sparsity": 0.5635565011189999, "compression_loss": 65.80924987792969, "distillation_loss": 2.699641704559326, "epoch": 2.82, "learning_rate": 3.989856297548606e-05, "loss": 68.3635, "step": 3334, "task_loss": 2.0088369846343994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6150018375753401, "compression/movement_sparsity/importance_threshold": -0.0026964250356521597, "compression/movement_sparsity/linear_layer_sparsity": 0.5841153321710608, "compression/movement_sparsity/model_sparsity": 0.5640491735620409, "compression_loss": 65.85678100585938, "distillation_loss": 1.855750322341919, "epoch": 2.82, "learning_rate": 3.989386681694374e-05, "loss": 68.0974, "step": 3335, "task_loss": 2.192744493484497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6154486439087934, "compression/movement_sparsity/importance_threshold": -0.0026932957225769385, "compression/movement_sparsity/linear_layer_sparsity": 0.5846438112806823, "compression/movement_sparsity/model_sparsity": 0.5645594977884549, "compression_loss": 65.90428924560547, "distillation_loss": 2.667698621749878, "epoch": 2.82, "learning_rate": 3.988917065840143e-05, "loss": 68.6174, "step": 3336, "task_loss": 1.8491542339324951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6158951044169283, "compression/movement_sparsity/importance_threshold": -0.002690168831570542, "compression/movement_sparsity/linear_layer_sparsity": 0.5851092353918453, "compression/movement_sparsity/model_sparsity": 0.5650089331495874, "compression_loss": 65.95169830322266, "distillation_loss": 2.902984380722046, "epoch": 2.82, "learning_rate": 3.9884474499859116e-05, "loss": 68.2922, "step": 3337, "task_loss": 1.2953808307647705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6163412192336302, "compression/movement_sparsity/importance_threshold": -0.0026870443616952714, "compression/movement_sparsity/linear_layer_sparsity": 0.585718393419691, "compression/movement_sparsity/model_sparsity": 0.5655971647251874, "compression_loss": 65.99913024902344, "distillation_loss": 3.1647300720214844, "epoch": 2.82, "learning_rate": 3.98797783413168e-05, "loss": 68.3575, "step": 3338, "task_loss": 1.9030823707580566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6167869884927843, "compression/movement_sparsity/importance_threshold": -0.00268392231201343, "compression/movement_sparsity/linear_layer_sparsity": 0.5861663486252675, "compression/movement_sparsity/model_sparsity": 0.5660297312913809, "compression_loss": 66.04651641845703, "distillation_loss": 3.1306753158569336, "epoch": 2.82, "learning_rate": 3.9875082182774495e-05, "loss": 68.1845, "step": 3339, "task_loss": 1.6388765573501587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6172324123282753, "compression/movement_sparsity/importance_threshold": -0.002680802681587326, "compression/movement_sparsity/linear_layer_sparsity": 0.586757894657515, "compression/movement_sparsity/model_sparsity": 0.5666009558976123, "compression_loss": 66.09388732910156, "distillation_loss": 1.6145071983337402, "epoch": 2.82, "learning_rate": 3.987038602423218e-05, "loss": 67.8443, "step": 3340, "task_loss": 1.1877573728561401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.617677490873989, "compression/movement_sparsity/importance_threshold": -0.0026776854694792584, "compression/movement_sparsity/linear_layer_sparsity": 0.5872950307128401, "compression/movement_sparsity/model_sparsity": 0.5671196396770132, "compression_loss": 66.14115142822266, "distillation_loss": 1.7615495920181274, "epoch": 2.82, "learning_rate": 3.986568986568987e-05, "loss": 68.2722, "step": 3341, "task_loss": 0.5915787220001221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6181222242638098, "compression/movement_sparsity/importance_threshold": -0.002674570674751536, "compression/movement_sparsity/linear_layer_sparsity": 0.587884633105763, "compression/movement_sparsity/model_sparsity": 0.5676889874139099, "compression_loss": 66.18843078613281, "distillation_loss": 2.699761390686035, "epoch": 2.82, "learning_rate": 3.9860993707147554e-05, "loss": 68.3208, "step": 3342, "task_loss": 1.716234564781189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6185666126316237, "compression/movement_sparsity/importance_threshold": -0.0026714582964664562, "compression/movement_sparsity/linear_layer_sparsity": 0.5884588652466032, "compression/movement_sparsity/model_sparsity": 0.5682434929141676, "compression_loss": 66.23563385009766, "distillation_loss": 2.8895297050476074, "epoch": 2.83, "learning_rate": 3.985629754860524e-05, "loss": 68.0224, "step": 3343, "task_loss": 2.2739460468292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6190106561113149, "compression/movement_sparsity/importance_threshold": -0.0026683483336863304, "compression/movement_sparsity/linear_layer_sparsity": 0.5890808179063222, "compression/movement_sparsity/model_sparsity": 0.5688440795866752, "compression_loss": 66.2828369140625, "distillation_loss": 1.3110913038253784, "epoch": 2.83, "learning_rate": 3.9851601390062934e-05, "loss": 68.0116, "step": 3344, "task_loss": 0.5842018723487854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6194543548367692, "compression/movement_sparsity/importance_threshold": -0.002665240785473458, "compression/movement_sparsity/linear_layer_sparsity": 0.5897905324398411, "compression/movement_sparsity/model_sparsity": 0.5695294132426304, "compression_loss": 66.32998657226562, "distillation_loss": 1.4897422790527344, "epoch": 2.83, "learning_rate": 3.984690523152062e-05, "loss": 68.5351, "step": 3345, "task_loss": 0.924243152141571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6198977089418716, "compression/movement_sparsity/importance_threshold": -0.0026621356508901442, "compression/movement_sparsity/linear_layer_sparsity": 0.5903972460133214, "compression/movement_sparsity/model_sparsity": 0.5701152843383924, "compression_loss": 66.3770523071289, "distillation_loss": 1.7013236284255981, "epoch": 2.83, "learning_rate": 3.9842209072978306e-05, "loss": 68.649, "step": 3346, "task_loss": 0.927203357219696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6203407185605072, "compression/movement_sparsity/importance_threshold": -0.0026590329289986918, "compression/movement_sparsity/linear_layer_sparsity": 0.5908869835022941, "compression/movement_sparsity/model_sparsity": 0.5705881978380101, "compression_loss": 66.42414093017578, "distillation_loss": 2.592482089996338, "epoch": 2.83, "learning_rate": 3.983751291443599e-05, "loss": 68.4909, "step": 3347, "task_loss": 2.7591958045959473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6207833838265611, "compression/movement_sparsity/importance_threshold": -0.002655932618861407, "compression/movement_sparsity/linear_layer_sparsity": 0.5915580518085051, "compression/movement_sparsity/model_sparsity": 0.5712362128834552, "compression_loss": 66.47114562988281, "distillation_loss": 3.3673043251037598, "epoch": 2.83, "learning_rate": 3.983281675589368e-05, "loss": 68.8837, "step": 3348, "task_loss": 2.150573968887329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6212257048739183, "compression/movement_sparsity/importance_threshold": -0.0026528347195405935, "compression/movement_sparsity/linear_layer_sparsity": 0.5921689507648257, "compression/movement_sparsity/model_sparsity": 0.5718261255812811, "compression_loss": 66.51812744140625, "distillation_loss": 2.211599111557007, "epoch": 2.83, "learning_rate": 3.982812059735137e-05, "loss": 68.9843, "step": 3349, "task_loss": 1.7108513116836548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6216676818364644, "compression/movement_sparsity/importance_threshold": -0.002649739230098552, "compression/movement_sparsity/linear_layer_sparsity": 0.5927960069682928, "compression/movement_sparsity/model_sparsity": 0.5724316404751086, "compression_loss": 66.56510162353516, "distillation_loss": 1.3534952402114868, "epoch": 2.83, "learning_rate": 3.982342443880905e-05, "loss": 69.0795, "step": 3350, "task_loss": 1.6996979713439941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.622109314848084, "compression/movement_sparsity/importance_threshold": -0.002646646149597591, "compression/movement_sparsity/linear_layer_sparsity": 0.5933821990492718, "compression/movement_sparsity/model_sparsity": 0.5729976950547683, "compression_loss": 66.61201477050781, "distillation_loss": 1.6764590740203857, "epoch": 2.83, "learning_rate": 3.9818728280266745e-05, "loss": 69.0615, "step": 3351, "task_loss": 2.2385294437408447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6225506040426627, "compression/movement_sparsity/importance_threshold": -0.002643555477100011, "compression/movement_sparsity/linear_layer_sparsity": 0.5939691662011471, "compression/movement_sparsity/model_sparsity": 0.5735644980792546, "compression_loss": 66.65890502929688, "distillation_loss": 1.744621992111206, "epoch": 2.83, "learning_rate": 3.981403212172443e-05, "loss": 68.6025, "step": 3352, "task_loss": 0.7098692655563354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6229915495540852, "compression/movement_sparsity/importance_threshold": -0.0026404672116681184, "compression/movement_sparsity/linear_layer_sparsity": 0.594566268895513, "compression/movement_sparsity/model_sparsity": 0.5741410884591661, "compression_loss": 66.70573425292969, "distillation_loss": 3.49221134185791, "epoch": 2.83, "learning_rate": 3.980933596318212e-05, "loss": 69.4774, "step": 3353, "task_loss": 2.386349678039551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6234321515162371, "compression/movement_sparsity/importance_threshold": -0.0026373813523642153, "compression/movement_sparsity/linear_layer_sparsity": 0.5950856856377312, "compression/movement_sparsity/model_sparsity": 0.5746426616383764, "compression_loss": 66.7525863647461, "distillation_loss": 1.6641709804534912, "epoch": 2.83, "learning_rate": 3.9804639804639804e-05, "loss": 69.1548, "step": 3354, "task_loss": 2.0368950366973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6238724100630031, "compression/movement_sparsity/importance_threshold": -0.0026342978982506074, "compression/movement_sparsity/linear_layer_sparsity": 0.5956902767093724, "compression/movement_sparsity/model_sparsity": 0.5752264831467669, "compression_loss": 66.79933166503906, "distillation_loss": 1.8775321245193481, "epoch": 2.84, "learning_rate": 3.979994364609749e-05, "loss": 69.1763, "step": 3355, "task_loss": 1.2580708265304565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6243123253282686, "compression/movement_sparsity/importance_threshold": -0.0026312168483895977, "compression/movement_sparsity/linear_layer_sparsity": 0.5962858650344485, "compression/movement_sparsity/model_sparsity": 0.5758016111806327, "compression_loss": 66.8460693359375, "distillation_loss": 2.5882701873779297, "epoch": 2.84, "learning_rate": 3.979524748755518e-05, "loss": 69.5024, "step": 3356, "task_loss": 1.6682413816452026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6247518974459187, "compression/movement_sparsity/importance_threshold": -0.00262813820184349, "compression/movement_sparsity/linear_layer_sparsity": 0.5970708687624202, "compression/movement_sparsity/model_sparsity": 0.5765596476155946, "compression_loss": 66.89278411865234, "distillation_loss": 3.0579864978790283, "epoch": 2.84, "learning_rate": 3.979055132901287e-05, "loss": 69.0322, "step": 3357, "task_loss": 1.3409351110458374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6251911265498385, "compression/movement_sparsity/importance_threshold": -0.0026250619576745895, "compression/movement_sparsity/linear_layer_sparsity": 0.5975897012204242, "compression/movement_sparsity/model_sparsity": 0.5770606565825509, "compression_loss": 66.9394760131836, "distillation_loss": 3.6637699604034424, "epoch": 2.84, "learning_rate": 3.978585517047056e-05, "loss": 69.2853, "step": 3358, "task_loss": 2.6948177814483643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6256300127739135, "compression/movement_sparsity/importance_threshold": -0.002621988114945197, "compression/movement_sparsity/linear_layer_sparsity": 0.5982219205884777, "compression/movement_sparsity/model_sparsity": 0.5776711572703775, "compression_loss": 66.9861068725586, "distillation_loss": 1.2880866527557373, "epoch": 2.84, "learning_rate": 3.978115901192824e-05, "loss": 69.891, "step": 3359, "task_loss": 1.3387134075164795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6260685562520281, "compression/movement_sparsity/importance_threshold": -0.002618916672717621, "compression/movement_sparsity/linear_layer_sparsity": 0.5987688702337671, "compression/movement_sparsity/model_sparsity": 0.5781993175127373, "compression_loss": 67.03270721435547, "distillation_loss": 2.382185220718384, "epoch": 2.84, "learning_rate": 3.977646285338593e-05, "loss": 69.3353, "step": 3360, "task_loss": 1.646415114402771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6265067571180682, "compression/movement_sparsity/importance_threshold": -0.002615847630054161, "compression/movement_sparsity/linear_layer_sparsity": 0.5992763151116789, "compression/movement_sparsity/model_sparsity": 0.5786893300980098, "compression_loss": 67.07929229736328, "distillation_loss": 2.7674007415771484, "epoch": 2.84, "learning_rate": 3.977176669484362e-05, "loss": 69.8185, "step": 3361, "task_loss": 1.3305968046188354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6269446155059183, "compression/movement_sparsity/importance_threshold": -0.002612780986017125, "compression/movement_sparsity/linear_layer_sparsity": 0.5997412264836336, "compression/movement_sparsity/model_sparsity": 0.5791382703341031, "compression_loss": 67.12576293945312, "distillation_loss": 1.6906708478927612, "epoch": 2.84, "learning_rate": 3.976707053630131e-05, "loss": 68.8935, "step": 3362, "task_loss": 0.7392365336418152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6273821315494641, "compression/movement_sparsity/importance_threshold": -0.002609716739668814, "compression/movement_sparsity/linear_layer_sparsity": 0.6003295768389547, "compression/movement_sparsity/model_sparsity": 0.5797064090447416, "compression_loss": 67.1722412109375, "distillation_loss": 1.6026190519332886, "epoch": 2.84, "learning_rate": 3.9762374377758994e-05, "loss": 70.0181, "step": 3363, "task_loss": 1.0470930337905884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6278193053825902, "compression/movement_sparsity/importance_threshold": -0.0026066548900715346, "compression/movement_sparsity/linear_layer_sparsity": 0.600883895619843, "compression/movement_sparsity/model_sparsity": 0.5802416852702224, "compression_loss": 67.21868133544922, "distillation_loss": 3.251743793487549, "epoch": 2.84, "learning_rate": 3.975767821921668e-05, "loss": 69.7877, "step": 3364, "task_loss": 2.536362409591675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6282561371391822, "compression/movement_sparsity/importance_threshold": -0.0026035954362875887, "compression/movement_sparsity/linear_layer_sparsity": 0.6015237464551835, "compression/movement_sparsity/model_sparsity": 0.5808595552609574, "compression_loss": 67.26506805419922, "distillation_loss": 2.310790777206421, "epoch": 2.84, "learning_rate": 3.9752982060674374e-05, "loss": 69.6227, "step": 3365, "task_loss": 2.104435920715332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.628692626953125, "compression/movement_sparsity/importance_threshold": -0.00260053837737928, "compression/movement_sparsity/linear_layer_sparsity": 0.6020070091211297, "compression/movement_sparsity/model_sparsity": 0.5813262163676386, "compression_loss": 67.3114013671875, "distillation_loss": 2.838500499725342, "epoch": 2.84, "learning_rate": 3.974828590213206e-05, "loss": 69.7047, "step": 3366, "task_loss": 2.7425289154052734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6291287749583039, "compression/movement_sparsity/importance_threshold": -0.0025974837124089134, "compression/movement_sparsity/linear_layer_sparsity": 0.6024330834790944, "compression/movement_sparsity/model_sparsity": 0.5817376537606491, "compression_loss": 67.35771942138672, "distillation_loss": 1.9508877992630005, "epoch": 2.85, "learning_rate": 3.974358974358974e-05, "loss": 70.2413, "step": 3367, "task_loss": 1.194808006286621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6295645812886038, "compression/movement_sparsity/importance_threshold": -0.0025944314404387934, "compression/movement_sparsity/linear_layer_sparsity": 0.6029597739635705, "compression/movement_sparsity/model_sparsity": 0.582246250806694, "compression_loss": 67.40400695800781, "distillation_loss": 2.17785906791687, "epoch": 2.85, "learning_rate": 3.973889358504743e-05, "loss": 70.2992, "step": 3368, "task_loss": 1.6217352151870728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6300000460779098, "compression/movement_sparsity/importance_threshold": -0.002591381560531225, "compression/movement_sparsity/linear_layer_sparsity": 0.6035609070265971, "compression/movement_sparsity/model_sparsity": 0.5828267330997041, "compression_loss": 67.45024871826172, "distillation_loss": 1.50167977809906, "epoch": 2.85, "learning_rate": 3.973419742650512e-05, "loss": 69.5914, "step": 3369, "task_loss": 1.1355386972427368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6304351694601077, "compression/movement_sparsity/importance_threshold": -0.0025883340717485076, "compression/movement_sparsity/linear_layer_sparsity": 0.6039875775929436, "compression/movement_sparsity/model_sparsity": 0.5832387462195044, "compression_loss": 67.4964370727539, "distillation_loss": 1.582158088684082, "epoch": 2.85, "learning_rate": 3.972950126796281e-05, "loss": 69.5025, "step": 3370, "task_loss": 1.1491085290908813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6308699515690818, "compression/movement_sparsity/importance_threshold": -0.0025852889731529513, "compression/movement_sparsity/linear_layer_sparsity": 0.6045432557289425, "compression/movement_sparsity/model_sparsity": 0.5837753351020658, "compression_loss": 67.54263305664062, "distillation_loss": 2.3507776260375977, "epoch": 2.85, "learning_rate": 3.97248051094205e-05, "loss": 69.8696, "step": 3371, "task_loss": 2.7397546768188477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6313043925387178, "compression/movement_sparsity/importance_threshold": -0.002582246263806854, "compression/movement_sparsity/linear_layer_sparsity": 0.605103989712019, "compression/movement_sparsity/model_sparsity": 0.5843168061478041, "compression_loss": 67.5887680053711, "distillation_loss": 2.569270610809326, "epoch": 2.85, "learning_rate": 3.9720108950878185e-05, "loss": 69.8661, "step": 3372, "task_loss": 1.4933189153671265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6317384925029005, "compression/movement_sparsity/importance_threshold": -0.0025792059427725243, "compression/movement_sparsity/linear_layer_sparsity": 0.6056836115766306, "compression/movement_sparsity/model_sparsity": 0.584876516218241, "compression_loss": 67.63484954833984, "distillation_loss": 2.101522207260132, "epoch": 2.85, "learning_rate": 3.971541279233587e-05, "loss": 70.0835, "step": 3373, "task_loss": 1.3961799144744873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6321722515955155, "compression/movement_sparsity/importance_threshold": -0.0025761680091122624, "compression/movement_sparsity/linear_layer_sparsity": 0.6061813621062544, "compression/movement_sparsity/model_sparsity": 0.5853571674859125, "compression_loss": 67.6809310913086, "distillation_loss": 1.8312392234802246, "epoch": 2.85, "learning_rate": 3.971071663379356e-05, "loss": 70.3236, "step": 3374, "task_loss": 1.2809613943099976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6326056699504472, "compression/movement_sparsity/importance_threshold": -0.002573132461888378, "compression/movement_sparsity/linear_layer_sparsity": 0.6067118802485416, "compression/movement_sparsity/model_sparsity": 0.5858694606979473, "compression_loss": 67.72693634033203, "distillation_loss": 4.660986423492432, "epoch": 2.85, "learning_rate": 3.970602047525125e-05, "loss": 70.2048, "step": 3375, "task_loss": 2.9179062843322754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6330387477015813, "compression/movement_sparsity/importance_threshold": -0.0025700993001631697, "compression/movement_sparsity/linear_layer_sparsity": 0.6072962240835371, "compression/movement_sparsity/model_sparsity": 0.5864337305245588, "compression_loss": 67.77293395996094, "distillation_loss": 1.908196210861206, "epoch": 2.85, "learning_rate": 3.970132431670893e-05, "loss": 70.0975, "step": 3376, "task_loss": 1.3825643062591553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6334714849828029, "compression/movement_sparsity/importance_threshold": -0.0025670685229989424, "compression/movement_sparsity/linear_layer_sparsity": 0.6078894156509184, "compression/movement_sparsity/model_sparsity": 0.5870065441367298, "compression_loss": 67.81888580322266, "distillation_loss": 2.9968273639678955, "epoch": 2.85, "learning_rate": 3.9696628158166623e-05, "loss": 70.1975, "step": 3377, "task_loss": 1.7829724550247192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6339038819279967, "compression/movement_sparsity/importance_threshold": -0.0025640401294580034, "compression/movement_sparsity/linear_layer_sparsity": 0.6082964232648332, "compression/movement_sparsity/model_sparsity": 0.5873995697870049, "compression_loss": 67.86475372314453, "distillation_loss": 2.1071643829345703, "epoch": 2.85, "learning_rate": 3.969193199962431e-05, "loss": 69.706, "step": 3378, "task_loss": 0.9504131078720093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6343359386710488, "compression/movement_sparsity/importance_threshold": -0.0025610141186026515, "compression/movement_sparsity/linear_layer_sparsity": 0.608742828328617, "compression/movement_sparsity/model_sparsity": 0.5878306394635452, "compression_loss": 67.91063690185547, "distillation_loss": 1.7914707660675049, "epoch": 2.86, "learning_rate": 3.9687235841081996e-05, "loss": 69.7904, "step": 3379, "task_loss": 1.778961181640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6347676553458432, "compression/movement_sparsity/importance_threshold": -0.002557990489495195, "compression/movement_sparsity/linear_layer_sparsity": 0.6093360675926689, "compression/movement_sparsity/model_sparsity": 0.5884034991338594, "compression_loss": 67.95645141601562, "distillation_loss": 1.8042007684707642, "epoch": 2.86, "learning_rate": 3.968253968253968e-05, "loss": 70.1543, "step": 3380, "task_loss": 1.5165016651153564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6351990320862659, "compression/movement_sparsity/importance_threshold": -0.0025549692411979347, "compression/movement_sparsity/linear_layer_sparsity": 0.6098753738465037, "compression/movement_sparsity/model_sparsity": 0.5889242785587748, "compression_loss": 68.0022201538086, "distillation_loss": 2.283940315246582, "epoch": 2.86, "learning_rate": 3.967784352399737e-05, "loss": 70.0865, "step": 3381, "task_loss": 2.0026798248291016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6356300690262014, "compression/movement_sparsity/importance_threshold": -0.0025519503727731785, "compression/movement_sparsity/linear_layer_sparsity": 0.6104497490773556, "compression/movement_sparsity/model_sparsity": 0.5894789222334621, "compression_loss": 68.04796600341797, "distillation_loss": 2.2981314659118652, "epoch": 2.86, "learning_rate": 3.967314736545506e-05, "loss": 70.383, "step": 3382, "task_loss": 1.2954233884811401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6360607662995356, "compression/movement_sparsity/importance_threshold": -0.002548933883283226, "compression/movement_sparsity/linear_layer_sparsity": 0.6110890394768171, "compression/movement_sparsity/model_sparsity": 0.5900962510410148, "compression_loss": 68.09371185302734, "distillation_loss": 1.399935007095337, "epoch": 2.86, "learning_rate": 3.966845120691275e-05, "loss": 70.306, "step": 3383, "task_loss": 0.6511766910552979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6364911240401527, "compression/movement_sparsity/importance_threshold": -0.002545919771790385, "compression/movement_sparsity/linear_layer_sparsity": 0.6116268313613622, "compression/movement_sparsity/model_sparsity": 0.5906155681198844, "compression_loss": 68.13936614990234, "distillation_loss": 3.2246179580688477, "epoch": 2.86, "learning_rate": 3.9663755048370435e-05, "loss": 70.5414, "step": 3384, "task_loss": 2.5929291248321533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6369211423819386, "compression/movement_sparsity/importance_threshold": -0.002542908037356956, "compression/movement_sparsity/linear_layer_sparsity": 0.612124844222674, "compression/movement_sparsity/model_sparsity": 0.5910964727073433, "compression_loss": 68.18500518798828, "distillation_loss": 2.1962943077087402, "epoch": 2.86, "learning_rate": 3.965905888982812e-05, "loss": 70.3342, "step": 3385, "task_loss": 0.9510858654975891 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6373508214587782, "compression/movement_sparsity/importance_threshold": -0.002539898679045245, "compression/movement_sparsity/linear_layer_sparsity": 0.6127913932693511, "compression/movement_sparsity/model_sparsity": 0.5917401237437224, "compression_loss": 68.23057556152344, "distillation_loss": 2.1230571269989014, "epoch": 2.86, "learning_rate": 3.965436273128581e-05, "loss": 70.2589, "step": 3386, "task_loss": 1.4040042161941528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6377801614045565, "compression/movement_sparsity/importance_threshold": -0.0025368916959175557, "compression/movement_sparsity/linear_layer_sparsity": 0.6133472741161998, "compression/movement_sparsity/model_sparsity": 0.5922769083733923, "compression_loss": 68.276123046875, "distillation_loss": 2.478807210922241, "epoch": 2.86, "learning_rate": 3.96496665727435e-05, "loss": 71.107, "step": 3387, "task_loss": 2.228999376296997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6382091623531588, "compression/movement_sparsity/importance_threshold": -0.002533887087036192, "compression/movement_sparsity/linear_layer_sparsity": 0.6138153930892485, "compression/movement_sparsity/model_sparsity": 0.5927289460196143, "compression_loss": 68.32160186767578, "distillation_loss": 1.7568552494049072, "epoch": 2.86, "learning_rate": 3.964497041420119e-05, "loss": 70.6454, "step": 3388, "task_loss": 1.4515525102615356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6386378244384701, "compression/movement_sparsity/importance_threshold": -0.0025308848514634594, "compression/movement_sparsity/linear_layer_sparsity": 0.6143859764347922, "compression/movement_sparsity/model_sparsity": 0.593279928071919, "compression_loss": 68.36710357666016, "distillation_loss": 2.3434557914733887, "epoch": 2.86, "learning_rate": 3.964027425565887e-05, "loss": 71.0432, "step": 3389, "task_loss": 1.457148551940918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6390661477943758, "compression/movement_sparsity/importance_threshold": -0.0025278849882616585, "compression/movement_sparsity/linear_layer_sparsity": 0.6148380931748736, "compression/movement_sparsity/model_sparsity": 0.5937165132111047, "compression_loss": 68.4125747680664, "distillation_loss": 2.058634042739868, "epoch": 2.87, "learning_rate": 3.963557809711656e-05, "loss": 70.675, "step": 3390, "task_loss": 1.6738131046295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6394941325547607, "compression/movement_sparsity/importance_threshold": -0.0025248874964930966, "compression/movement_sparsity/linear_layer_sparsity": 0.6154294484204389, "compression/movement_sparsity/model_sparsity": 0.5942875535847635, "compression_loss": 68.45799255371094, "distillation_loss": 3.2280449867248535, "epoch": 2.87, "learning_rate": 3.9630881938574246e-05, "loss": 70.7327, "step": 3391, "task_loss": 1.5819525718688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6399217788535103, "compression/movement_sparsity/importance_threshold": -0.002521892375220074, "compression/movement_sparsity/linear_layer_sparsity": 0.6160955443487458, "compression/movement_sparsity/model_sparsity": 0.5949307670687822, "compression_loss": 68.50337219238281, "distillation_loss": 2.562039375305176, "epoch": 2.87, "learning_rate": 3.962618578003194e-05, "loss": 71.2947, "step": 3392, "task_loss": 1.8190886974334717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6403490868245094, "compression/movement_sparsity/importance_threshold": -0.0025188996235048994, "compression/movement_sparsity/linear_layer_sparsity": 0.6165976114270538, "compression/movement_sparsity/model_sparsity": 0.5954155865984114, "compression_loss": 68.5487060546875, "distillation_loss": 1.914147138595581, "epoch": 2.87, "learning_rate": 3.962148962148962e-05, "loss": 71.1094, "step": 3393, "task_loss": 0.9982047080993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6407760566016435, "compression/movement_sparsity/importance_threshold": -0.002515909240409872, "compression/movement_sparsity/linear_layer_sparsity": 0.6171095755644996, "compression/movement_sparsity/model_sparsity": 0.59590996319275, "compression_loss": 68.59396362304688, "distillation_loss": 0.9200165271759033, "epoch": 2.87, "learning_rate": 3.961679346294731e-05, "loss": 70.4365, "step": 3394, "task_loss": 0.3802624046802521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6412026883187972, "compression/movement_sparsity/importance_threshold": -0.0025129212249973007, "compression/movement_sparsity/linear_layer_sparsity": 0.617850090223022, "compression/movement_sparsity/model_sparsity": 0.5966250388946626, "compression_loss": 68.63921356201172, "distillation_loss": 1.90958833694458, "epoch": 2.87, "learning_rate": 3.9612097304405e-05, "loss": 70.8727, "step": 3395, "task_loss": 0.8666361570358276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6416289821098562, "compression/movement_sparsity/importance_threshold": -0.0025099355763294855, "compression/movement_sparsity/linear_layer_sparsity": 0.618466736628143, "compression/movement_sparsity/model_sparsity": 0.5972205015987415, "compression_loss": 68.68440246582031, "distillation_loss": 2.463286876678467, "epoch": 2.87, "learning_rate": 3.960740114586269e-05, "loss": 71.6596, "step": 3396, "task_loss": 1.5694712400436401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6420549381087054, "compression/movement_sparsity/importance_threshold": -0.0025069522934687317, "compression/movement_sparsity/linear_layer_sparsity": 0.6189950368752499, "compression/movement_sparsity/model_sparsity": 0.5977306531071186, "compression_loss": 68.72952270507812, "distillation_loss": 3.5738658905029297, "epoch": 2.87, "learning_rate": 3.960270498732037e-05, "loss": 71.1766, "step": 3397, "task_loss": 1.5617579221725464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6424805564492297, "compression/movement_sparsity/importance_threshold": -0.002503971375477345, "compression/movement_sparsity/linear_layer_sparsity": 0.6195369425976294, "compression/movement_sparsity/model_sparsity": 0.5982539427008373, "compression_loss": 68.77462005615234, "distillation_loss": 2.5881693363189697, "epoch": 2.87, "learning_rate": 3.9598008828778064e-05, "loss": 71.4496, "step": 3398, "task_loss": 1.954263687133789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.642905837265315, "compression/movement_sparsity/importance_threshold": -0.0025009928214176245, "compression/movement_sparsity/linear_layer_sparsity": 0.6200187385909564, "compression/movement_sparsity/model_sparsity": 0.5987191875196157, "compression_loss": 68.81973266601562, "distillation_loss": 2.7040669918060303, "epoch": 2.87, "learning_rate": 3.959331267023575e-05, "loss": 71.0068, "step": 3399, "task_loss": 1.8171714544296265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6433307806908455, "compression/movement_sparsity/importance_threshold": -0.0024980166303518805, "compression/movement_sparsity/linear_layer_sparsity": 0.6204427262195847, "compression/movement_sparsity/model_sparsity": 0.5991286098688622, "compression_loss": 68.86479187011719, "distillation_loss": 1.901652455329895, "epoch": 2.87, "learning_rate": 3.9588616511693436e-05, "loss": 71.2142, "step": 3400, "task_loss": 1.7284334897994995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6437553868597072, "compression/movement_sparsity/importance_threshold": -0.0024950428013424102, "compression/movement_sparsity/linear_layer_sparsity": 0.6209155910113525, "compression/movement_sparsity/model_sparsity": 0.5995852303003305, "compression_loss": 68.90982055664062, "distillation_loss": 2.322361469268799, "epoch": 2.87, "learning_rate": 3.958392035315112e-05, "loss": 70.776, "step": 3401, "task_loss": 1.1826398372650146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6441796559057844, "compression/movement_sparsity/importance_threshold": -0.0024920713334515244, "compression/movement_sparsity/linear_layer_sparsity": 0.6213386604790729, "compression/movement_sparsity/model_sparsity": 0.5999937660303208, "compression_loss": 68.9548110961914, "distillation_loss": 2.1850244998931885, "epoch": 2.88, "learning_rate": 3.957922419460881e-05, "loss": 71.2168, "step": 3402, "task_loss": 1.630164384841919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.644603587962963, "compression/movement_sparsity/importance_threshold": -0.002489102225741522, "compression/movement_sparsity/linear_layer_sparsity": 0.6217883566131243, "compression/movement_sparsity/model_sparsity": 0.6004280137187403, "compression_loss": 68.99978637695312, "distillation_loss": 2.5563950538635254, "epoch": 2.88, "learning_rate": 3.95745280360665e-05, "loss": 71.7443, "step": 3403, "task_loss": 2.1134989261627197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6450271831651275, "compression/movement_sparsity/importance_threshold": -0.0024861354772747106, "compression/movement_sparsity/linear_layer_sparsity": 0.6224237597339365, "compression/movement_sparsity/model_sparsity": 0.601041588787624, "compression_loss": 69.04474639892578, "distillation_loss": 1.8564252853393555, "epoch": 2.88, "learning_rate": 3.956983187752419e-05, "loss": 70.9939, "step": 3404, "task_loss": 1.93593168258667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6454504416461636, "compression/movement_sparsity/importance_threshold": -0.0024831710871133913, "compression/movement_sparsity/linear_layer_sparsity": 0.6229165259614886, "compression/movement_sparsity/model_sparsity": 0.6015174269793334, "compression_loss": 69.08961486816406, "distillation_loss": 3.3086605072021484, "epoch": 2.88, "learning_rate": 3.9565135718981875e-05, "loss": 72.3267, "step": 3405, "task_loss": 1.7085344791412354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6458733635399561, "compression/movement_sparsity/importance_threshold": -0.0024802090543198695, "compression/movement_sparsity/linear_layer_sparsity": 0.6234249724694818, "compression/movement_sparsity/model_sparsity": 0.6020084067856126, "compression_loss": 69.134521484375, "distillation_loss": 1.772336483001709, "epoch": 2.88, "learning_rate": 3.956043956043956e-05, "loss": 71.2518, "step": 3406, "task_loss": 0.9642235040664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6462959489803901, "compression/movement_sparsity/importance_threshold": -0.002477249377956449, "compression/movement_sparsity/linear_layer_sparsity": 0.6239062437994328, "compression/movement_sparsity/model_sparsity": 0.6024731449648161, "compression_loss": 69.17930603027344, "distillation_loss": 3.7423996925354004, "epoch": 2.88, "learning_rate": 3.955574340189725e-05, "loss": 71.8873, "step": 3407, "task_loss": 1.615133285522461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.646718198101351, "compression/movement_sparsity/importance_threshold": -0.002474292057085434, "compression/movement_sparsity/linear_layer_sparsity": 0.6245149367847407, "compression/movement_sparsity/model_sparsity": 0.60306092747352, "compression_loss": 69.22412109375, "distillation_loss": 2.753981113433838, "epoch": 2.88, "learning_rate": 3.955104724335494e-05, "loss": 71.9212, "step": 3408, "task_loss": 1.4958302974700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6471401110367236, "compression/movement_sparsity/importance_threshold": -0.002471337090769129, "compression/movement_sparsity/linear_layer_sparsity": 0.6250927938725422, "compression/movement_sparsity/model_sparsity": 0.6036189333926593, "compression_loss": 69.26885223388672, "distillation_loss": 2.8000693321228027, "epoch": 2.88, "learning_rate": 3.954635108481263e-05, "loss": 72.1132, "step": 3409, "task_loss": 1.432848334312439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6475616879203937, "compression/movement_sparsity/importance_threshold": -0.0024683844780698343, "compression/movement_sparsity/linear_layer_sparsity": 0.625718979611772, "compression/movement_sparsity/model_sparsity": 0.6042236077253738, "compression_loss": 69.31355285644531, "distillation_loss": 2.859842300415039, "epoch": 2.88, "learning_rate": 3.954165492627031e-05, "loss": 71.5616, "step": 3410, "task_loss": 1.4489312171936035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6479829288862455, "compression/movement_sparsity/importance_threshold": -0.0024654342180498593, "compression/movement_sparsity/linear_layer_sparsity": 0.6262631986226728, "compression/movement_sparsity/model_sparsity": 0.6047491311390366, "compression_loss": 69.35826110839844, "distillation_loss": 2.56192684173584, "epoch": 2.88, "learning_rate": 3.9536958767728e-05, "loss": 72.3112, "step": 3411, "task_loss": 1.698162317276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.648403834068165, "compression/movement_sparsity/importance_threshold": -0.0024624863097715034, "compression/movement_sparsity/linear_layer_sparsity": 0.6266775276755161, "compression/movement_sparsity/model_sparsity": 0.6051492267142896, "compression_loss": 69.40292358398438, "distillation_loss": 3.5586342811584473, "epoch": 2.88, "learning_rate": 3.9532262609185686e-05, "loss": 72.0861, "step": 3412, "task_loss": 2.356511354446411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6488244036000367, "compression/movement_sparsity/importance_threshold": -0.002459540752297075, "compression/movement_sparsity/linear_layer_sparsity": 0.6272804374395207, "compression/movement_sparsity/model_sparsity": 0.6057314246731331, "compression_loss": 69.4474868774414, "distillation_loss": 2.0453691482543945, "epoch": 2.88, "learning_rate": 3.952756645064338e-05, "loss": 72.2958, "step": 3413, "task_loss": 1.8191465139389038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6492446376157464, "compression/movement_sparsity/importance_threshold": -0.0024565975446888726, "compression/movement_sparsity/linear_layer_sparsity": 0.6277815744327531, "compression/movement_sparsity/model_sparsity": 0.6062153460689703, "compression_loss": 69.49214172363281, "distillation_loss": 3.582063674926758, "epoch": 2.89, "learning_rate": 3.952287029210106e-05, "loss": 72.6939, "step": 3414, "task_loss": 3.2775070667266846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6496645362491783, "compression/movement_sparsity/importance_threshold": -0.0024536566860092063, "compression/movement_sparsity/linear_layer_sparsity": 0.6282623091751605, "compression/movement_sparsity/model_sparsity": 0.606679566094063, "compression_loss": 69.53668212890625, "distillation_loss": 2.9792656898498535, "epoch": 2.89, "learning_rate": 3.951817413355875e-05, "loss": 72.6849, "step": 3415, "task_loss": 2.7068710327148438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6500840996342183, "compression/movement_sparsity/importance_threshold": -0.0024507181753203764, "compression/movement_sparsity/linear_layer_sparsity": 0.6287962376293915, "compression/movement_sparsity/model_sparsity": 0.6071951524633351, "compression_loss": 69.5811996459961, "distillation_loss": 2.6012816429138184, "epoch": 2.89, "learning_rate": 3.951347797501644e-05, "loss": 71.9634, "step": 3416, "task_loss": 0.9923920035362244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6505033279047514, "compression/movement_sparsity/importance_threshold": -0.002447782011684687, "compression/movement_sparsity/linear_layer_sparsity": 0.6293147481348694, "compression/movement_sparsity/model_sparsity": 0.6076958505378249, "compression_loss": 69.62565612792969, "distillation_loss": 2.068847894668579, "epoch": 2.89, "learning_rate": 3.9508781816474125e-05, "loss": 72.1662, "step": 3417, "task_loss": 1.2652469873428345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6509222211946627, "compression/movement_sparsity/importance_threshold": -0.002444848194164442, "compression/movement_sparsity/linear_layer_sparsity": 0.6298689119015785, "compression/movement_sparsity/model_sparsity": 0.6082309770743405, "compression_loss": 69.67008972167969, "distillation_loss": 4.21000862121582, "epoch": 2.89, "learning_rate": 3.950408565793182e-05, "loss": 72.5042, "step": 3418, "task_loss": 2.495651960372925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6513407796378373, "compression/movement_sparsity/importance_threshold": -0.0024419167218219457, "compression/movement_sparsity/linear_layer_sparsity": 0.6303630136359057, "compression/movement_sparsity/model_sparsity": 0.6087081048940588, "compression_loss": 69.71454620361328, "distillation_loss": 2.989088535308838, "epoch": 2.89, "learning_rate": 3.94993894993895e-05, "loss": 72.5831, "step": 3419, "task_loss": 1.8758002519607544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6517590033681602, "compression/movement_sparsity/importance_threshold": -0.002438987593719504, "compression/movement_sparsity/linear_layer_sparsity": 0.6308652238042254, "compression/movement_sparsity/model_sparsity": 0.6091930625981176, "compression_loss": 69.75894165039062, "distillation_loss": 3.083568572998047, "epoch": 2.89, "learning_rate": 3.949469334084719e-05, "loss": 72.4935, "step": 3420, "task_loss": 2.7684543132781982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6521768925195169, "compression/movement_sparsity/importance_threshold": -0.002436060808919418, "compression/movement_sparsity/linear_layer_sparsity": 0.6313944064397373, "compression/movement_sparsity/model_sparsity": 0.6097040661821435, "compression_loss": 69.80332946777344, "distillation_loss": 2.5045700073242188, "epoch": 2.89, "learning_rate": 3.948999718230488e-05, "loss": 71.8744, "step": 3421, "task_loss": 2.545727014541626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6525944472257921, "compression/movement_sparsity/importance_threshold": -0.002433136366483995, "compression/movement_sparsity/linear_layer_sparsity": 0.631853522666221, "compression/movement_sparsity/model_sparsity": 0.6101474103538407, "compression_loss": 69.84757995605469, "distillation_loss": 4.963435649871826, "epoch": 2.89, "learning_rate": 3.948530102376257e-05, "loss": 73.1349, "step": 3422, "task_loss": 2.430263042449951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6530116676208715, "compression/movement_sparsity/importance_threshold": -0.002430214265475533, "compression/movement_sparsity/linear_layer_sparsity": 0.6323447983728185, "compression/movement_sparsity/model_sparsity": 0.6106218092285758, "compression_loss": 69.89185333251953, "distillation_loss": 3.4002950191497803, "epoch": 2.89, "learning_rate": 3.948060486522025e-05, "loss": 72.458, "step": 3423, "task_loss": 1.3298418521881104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6534285538386396, "compression/movement_sparsity/importance_threshold": -0.002427294504956343, "compression/movement_sparsity/linear_layer_sparsity": 0.6329355335616669, "compression/movement_sparsity/model_sparsity": 0.6111922508463731, "compression_loss": 69.93608856201172, "distillation_loss": 2.6990725994110107, "epoch": 2.89, "learning_rate": 3.9475908706677936e-05, "loss": 72.7111, "step": 3424, "task_loss": 1.5069451332092285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.653845106012982, "compression/movement_sparsity/importance_threshold": -0.002424377083988725, "compression/movement_sparsity/linear_layer_sparsity": 0.6333433877914839, "compression/movement_sparsity/model_sparsity": 0.6115860940286896, "compression_loss": 69.98027038574219, "distillation_loss": 4.127119541168213, "epoch": 2.89, "learning_rate": 3.947121254813563e-05, "loss": 72.7007, "step": 3425, "task_loss": 3.101025342941284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6542613242777837, "compression/movement_sparsity/importance_threshold": -0.002421462001634983, "compression/movement_sparsity/linear_layer_sparsity": 0.6338915060052016, "compression/movement_sparsity/model_sparsity": 0.6121153826955572, "compression_loss": 70.0244369506836, "distillation_loss": 3.782341718673706, "epoch": 2.9, "learning_rate": 3.9466516389593315e-05, "loss": 73.1741, "step": 3426, "task_loss": 2.8183016777038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6546772087669298, "compression/movement_sparsity/importance_threshold": -0.002418549256957423, "compression/movement_sparsity/linear_layer_sparsity": 0.6343312573834446, "compression/movement_sparsity/model_sparsity": 0.6125400272611241, "compression_loss": 70.06854248046875, "distillation_loss": 2.5417325496673584, "epoch": 2.9, "learning_rate": 3.9461820231051e-05, "loss": 72.8555, "step": 3427, "task_loss": 1.1935113668441772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6550927596143055, "compression/movement_sparsity/importance_threshold": -0.0024156388490183473, "compression/movement_sparsity/linear_layer_sparsity": 0.6348840975675462, "compression/movement_sparsity/model_sparsity": 0.6130738756841664, "compression_loss": 70.11267852783203, "distillation_loss": 1.734518051147461, "epoch": 2.9, "learning_rate": 3.945712407250869e-05, "loss": 72.4502, "step": 3428, "task_loss": 0.9092952609062195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6555079769537958, "compression/movement_sparsity/importance_threshold": -0.0024127307768800612, "compression/movement_sparsity/linear_layer_sparsity": 0.6353808345429209, "compression/movement_sparsity/model_sparsity": 0.6135535482162954, "compression_loss": 70.15677642822266, "distillation_loss": 3.5225138664245605, "epoch": 2.9, "learning_rate": 3.945242791396638e-05, "loss": 73.2771, "step": 3429, "task_loss": 2.2300021648406982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6559228609192862, "compression/movement_sparsity/importance_threshold": -0.0024098250396048662, "compression/movement_sparsity/linear_layer_sparsity": 0.635898975399202, "compression/movement_sparsity/model_sparsity": 0.6140538893401756, "compression_loss": 70.20085906982422, "distillation_loss": 1.5398763418197632, "epoch": 2.9, "learning_rate": 3.944773175542407e-05, "loss": 72.5097, "step": 3430, "task_loss": 1.2171375751495361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6563374116446614, "compression/movement_sparsity/importance_threshold": -0.002406921636255069, "compression/movement_sparsity/linear_layer_sparsity": 0.636269208880128, "compression/movement_sparsity/model_sparsity": 0.6144114041620602, "compression_loss": 70.24490356445312, "distillation_loss": 3.341099977493286, "epoch": 2.9, "learning_rate": 3.944303559688175e-05, "loss": 72.9376, "step": 3431, "task_loss": 1.224884033203125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6567516292638069, "compression/movement_sparsity/importance_threshold": -0.002404020565892971, "compression/movement_sparsity/linear_layer_sparsity": 0.6368179352263951, "compression/movement_sparsity/model_sparsity": 0.6149412800702534, "compression_loss": 70.28887176513672, "distillation_loss": 2.9473936557769775, "epoch": 2.9, "learning_rate": 3.943833943833944e-05, "loss": 73.5411, "step": 3432, "task_loss": 2.231801986694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6571655139106075, "compression/movement_sparsity/importance_threshold": -0.0024011218275808797, "compression/movement_sparsity/linear_layer_sparsity": 0.6372782438696424, "compression/movement_sparsity/model_sparsity": 0.6153857756955301, "compression_loss": 70.33283996582031, "distillation_loss": 3.4031615257263184, "epoch": 2.9, "learning_rate": 3.9433643279797126e-05, "loss": 73.041, "step": 3433, "task_loss": 1.198292851448059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6575790657189489, "compression/movement_sparsity/importance_threshold": -0.002398225420381095, "compression/movement_sparsity/linear_layer_sparsity": 0.6376679852888205, "compression/movement_sparsity/model_sparsity": 0.6157621282979746, "compression_loss": 70.37678527832031, "distillation_loss": 2.947099447250366, "epoch": 2.9, "learning_rate": 3.942894712125482e-05, "loss": 72.7191, "step": 3434, "task_loss": 2.070704936981201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6579922848227153, "compression/movement_sparsity/importance_threshold": -0.002395331343355926, "compression/movement_sparsity/linear_layer_sparsity": 0.6380450393936341, "compression/movement_sparsity/model_sparsity": 0.6161262294343338, "compression_loss": 70.42066955566406, "distillation_loss": 3.0444676876068115, "epoch": 2.9, "learning_rate": 3.9424250962712506e-05, "loss": 73.0662, "step": 3435, "task_loss": 2.250734329223633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6584051713557927, "compression/movement_sparsity/importance_threshold": -0.0023924395955676712, "compression/movement_sparsity/linear_layer_sparsity": 0.6386006936812977, "compression/movement_sparsity/model_sparsity": 0.6166627952878236, "compression_loss": 70.4645004272461, "distillation_loss": 3.1259472370147705, "epoch": 2.9, "learning_rate": 3.941955480417019e-05, "loss": 72.706, "step": 3436, "task_loss": 1.9956539869308472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.658817725452066, "compression/movement_sparsity/importance_threshold": -0.0023895501760786376, "compression/movement_sparsity/linear_layer_sparsity": 0.6391136475246573, "compression/movement_sparsity/model_sparsity": 0.6171581275886332, "compression_loss": 70.50831604003906, "distillation_loss": 1.299185037612915, "epoch": 2.9, "learning_rate": 3.941485864562788e-05, "loss": 73.142, "step": 3437, "task_loss": 0.844789445400238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6592299472454202, "compression/movement_sparsity/importance_threshold": -0.002386663083951128, "compression/movement_sparsity/linear_layer_sparsity": 0.6395717382727242, "compression/movement_sparsity/model_sparsity": 0.617600481510252, "compression_loss": 70.55211639404297, "distillation_loss": 1.9725441932678223, "epoch": 2.91, "learning_rate": 3.9410162487085565e-05, "loss": 72.8861, "step": 3438, "task_loss": 1.4636355638504028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6596418368697405, "compression/movement_sparsity/importance_threshold": -0.002383778318247447, "compression/movement_sparsity/linear_layer_sparsity": 0.6399490666333935, "compression/movement_sparsity/model_sparsity": 0.6179648474809345, "compression_loss": 70.59585571289062, "distillation_loss": 2.825838327407837, "epoch": 2.91, "learning_rate": 3.940546632854326e-05, "loss": 72.5972, "step": 3439, "task_loss": 4.363339900970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.660053394458912, "compression/movement_sparsity/importance_threshold": -0.0023808958780299006, "compression/movement_sparsity/linear_layer_sparsity": 0.6404604464866251, "compression/movement_sparsity/model_sparsity": 0.6184586598630192, "compression_loss": 70.63953399658203, "distillation_loss": 2.800205707550049, "epoch": 2.91, "learning_rate": 3.940077017000094e-05, "loss": 73.1447, "step": 3440, "task_loss": 1.7196780443191528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6604646201468201, "compression/movement_sparsity/importance_threshold": -0.002378015762360788, "compression/movement_sparsity/linear_layer_sparsity": 0.6409308548998599, "compression/movement_sparsity/model_sparsity": 0.6189129083001138, "compression_loss": 70.68316650390625, "distillation_loss": 3.213515520095825, "epoch": 2.91, "learning_rate": 3.939607401145863e-05, "loss": 73.4888, "step": 3441, "task_loss": 2.6564600467681885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6608755140673495, "compression/movement_sparsity/importance_threshold": -0.0023751379703024185, "compression/movement_sparsity/linear_layer_sparsity": 0.641431216822196, "compression/movement_sparsity/model_sparsity": 0.6193960812511243, "compression_loss": 70.72674560546875, "distillation_loss": 1.7173619270324707, "epoch": 2.91, "learning_rate": 3.939137785291632e-05, "loss": 73.1429, "step": 3442, "task_loss": 0.6602032780647278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.661286076354386, "compression/movement_sparsity/importance_threshold": -0.00237226250091709, "compression/movement_sparsity/linear_layer_sparsity": 0.6420269840097866, "compression/movement_sparsity/model_sparsity": 0.6199713820030269, "compression_loss": 70.770263671875, "distillation_loss": 3.48776912689209, "epoch": 2.91, "learning_rate": 3.9386681694374e-05, "loss": 73.933, "step": 3443, "task_loss": 1.7559863328933716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6616963071418138, "compression/movement_sparsity/importance_threshold": -0.002369389353267113, "compression/movement_sparsity/linear_layer_sparsity": 0.6425734328400353, "compression/movement_sparsity/model_sparsity": 0.6204990586348834, "compression_loss": 70.81375885009766, "distillation_loss": 2.4312257766723633, "epoch": 2.91, "learning_rate": 3.938198553583169e-05, "loss": 73.2838, "step": 3444, "task_loss": 1.474777340888977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.662106206563519, "compression/movement_sparsity/importance_threshold": -0.0023665185264147856, "compression/movement_sparsity/linear_layer_sparsity": 0.6430591876570174, "compression/movement_sparsity/model_sparsity": 0.6209681262795456, "compression_loss": 70.85722351074219, "distillation_loss": 2.045888900756836, "epoch": 2.91, "learning_rate": 3.9377289377289376e-05, "loss": 74.4556, "step": 3445, "task_loss": 1.9601024389266968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.662515774753386, "compression/movement_sparsity/importance_threshold": -0.002363650019422418, "compression/movement_sparsity/linear_layer_sparsity": 0.6435720341828682, "compression/movement_sparsity/model_sparsity": 0.621463354949533, "compression_loss": 70.90070343017578, "distillation_loss": 3.6094117164611816, "epoch": 2.91, "learning_rate": 3.937259321874707e-05, "loss": 73.9896, "step": 3446, "task_loss": 2.087212324142456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6629250118453004, "compression/movement_sparsity/importance_threshold": -0.002360783831352309, "compression/movement_sparsity/linear_layer_sparsity": 0.6440765934122121, "compression/movement_sparsity/model_sparsity": 0.6219505810171432, "compression_loss": 70.94407653808594, "distillation_loss": 3.968432664871216, "epoch": 2.91, "learning_rate": 3.9367897060204755e-05, "loss": 73.4152, "step": 3447, "task_loss": 2.3962337970733643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6633339179731472, "compression/movement_sparsity/importance_threshold": -0.002357919961266764, "compression/movement_sparsity/linear_layer_sparsity": 0.6445810453240474, "compression/movement_sparsity/model_sparsity": 0.6224377034539312, "compression_loss": 70.98741149902344, "distillation_loss": 3.249356746673584, "epoch": 2.91, "learning_rate": 3.936320090166244e-05, "loss": 73.7963, "step": 3448, "task_loss": 1.4402354955673218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6637424932708114, "compression/movement_sparsity/importance_threshold": -0.0023550584082280886, "compression/movement_sparsity/linear_layer_sparsity": 0.6450214883040133, "compression/movement_sparsity/model_sparsity": 0.6228630158625742, "compression_loss": 71.03071594238281, "distillation_loss": 4.58958625793457, "epoch": 2.91, "learning_rate": 3.935850474312013e-05, "loss": 74.2047, "step": 3449, "task_loss": 2.3297276496887207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6641507378721785, "compression/movement_sparsity/importance_threshold": -0.002352199171298584, "compression/movement_sparsity/linear_layer_sparsity": 0.6455775360892088, "compression/movement_sparsity/model_sparsity": 0.6233999616957452, "compression_loss": 71.07402801513672, "distillation_loss": 3.513432025909424, "epoch": 2.92, "learning_rate": 3.9353808584577814e-05, "loss": 73.5117, "step": 3450, "task_loss": 1.4783369302749634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6645586519111331, "compression/movement_sparsity/importance_threshold": -0.0023493422495405574, "compression/movement_sparsity/linear_layer_sparsity": 0.6459419744246637, "compression/movement_sparsity/model_sparsity": 0.6237518804532338, "compression_loss": 71.1172866821289, "distillation_loss": 3.0493850708007812, "epoch": 2.92, "learning_rate": 3.934911242603551e-05, "loss": 74.1033, "step": 3451, "task_loss": 2.0505120754241943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6649662355215611, "compression/movement_sparsity/importance_threshold": -0.0023464876420163077, "compression/movement_sparsity/linear_layer_sparsity": 0.6464074104599943, "compression/movement_sparsity/model_sparsity": 0.6242013273289021, "compression_loss": 71.16053009033203, "distillation_loss": 2.648249387741089, "epoch": 2.92, "learning_rate": 3.9344416267493194e-05, "loss": 74.2024, "step": 3452, "task_loss": 1.5979214906692505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6653734888373467, "compression/movement_sparsity/importance_threshold": -0.0023436353477881465, "compression/movement_sparsity/linear_layer_sparsity": 0.646936962744703, "compression/movement_sparsity/model_sparsity": 0.6247126878635376, "compression_loss": 71.20372772216797, "distillation_loss": 1.356490135192871, "epoch": 2.92, "learning_rate": 3.933972010895088e-05, "loss": 73.8793, "step": 3453, "task_loss": 1.0951056480407715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6657804119923759, "compression/movement_sparsity/importance_threshold": -0.002340785365918371, "compression/movement_sparsity/linear_layer_sparsity": 0.6474527903124628, "compression/movement_sparsity/model_sparsity": 0.6252107951674737, "compression_loss": 71.24679565429688, "distillation_loss": 3.1165103912353516, "epoch": 2.92, "learning_rate": 3.9335023950408567e-05, "loss": 74.1133, "step": 3454, "task_loss": 2.2923457622528076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6661870051205332, "compression/movement_sparsity/importance_threshold": -0.002337937695469289, "compression/movement_sparsity/linear_layer_sparsity": 0.6479854190324216, "compression/movement_sparsity/model_sparsity": 0.6257251264523442, "compression_loss": 71.28990936279297, "distillation_loss": 3.222261428833008, "epoch": 2.92, "learning_rate": 3.933032779186625e-05, "loss": 74.6906, "step": 3455, "task_loss": 2.9152517318725586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6665932683557042, "compression/movement_sparsity/importance_threshold": -0.002335092335503202, "compression/movement_sparsity/linear_layer_sparsity": 0.6484795684634194, "compression/movement_sparsity/model_sparsity": 0.6262023003302059, "compression_loss": 71.33300018310547, "distillation_loss": 2.3612823486328125, "epoch": 2.92, "learning_rate": 3.9325631633323946e-05, "loss": 73.6173, "step": 3456, "task_loss": 2.113659381866455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6669992018317739, "compression/movement_sparsity/importance_threshold": -0.002332249285082416, "compression/movement_sparsity/linear_layer_sparsity": 0.6487986830376903, "compression/movement_sparsity/model_sparsity": 0.6265104523371393, "compression_loss": 71.37603759765625, "distillation_loss": 3.4456934928894043, "epoch": 2.92, "learning_rate": 3.9320935474781626e-05, "loss": 74.3593, "step": 3457, "task_loss": 1.8562883138656616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6674048056826272, "compression/movement_sparsity/importance_threshold": -0.002329408543269234, "compression/movement_sparsity/linear_layer_sparsity": 0.6493443925695456, "compression/movement_sparsity/model_sparsity": 0.6270374150677764, "compression_loss": 71.41908264160156, "distillation_loss": 3.216428279876709, "epoch": 2.92, "learning_rate": 3.931623931623932e-05, "loss": 74.2239, "step": 3458, "task_loss": 2.017503261566162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6678100800421496, "compression/movement_sparsity/importance_threshold": -0.002326570109125959, "compression/movement_sparsity/linear_layer_sparsity": 0.6498514439499253, "compression/movement_sparsity/model_sparsity": 0.6275270476733678, "compression_loss": 71.4620590209961, "distillation_loss": 2.425304412841797, "epoch": 2.92, "learning_rate": 3.9311543157697005e-05, "loss": 74.193, "step": 3459, "task_loss": 1.093080997467041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6682150250442258, "compression/movement_sparsity/importance_threshold": -0.0023237339817148983, "compression/movement_sparsity/linear_layer_sparsity": 0.6502760635594385, "compression/movement_sparsity/model_sparsity": 0.6279370802930113, "compression_loss": 71.50495147705078, "distillation_loss": 2.074154853820801, "epoch": 2.92, "learning_rate": 3.93068469991547e-05, "loss": 73.8348, "step": 3460, "task_loss": 1.9899340867996216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6686196408227416, "compression/movement_sparsity/importance_threshold": -0.002320900160098352, "compression/movement_sparsity/linear_layer_sparsity": 0.6507725978239635, "compression/movement_sparsity/model_sparsity": 0.6284165570780318, "compression_loss": 71.54790496826172, "distillation_loss": 1.9942905902862549, "epoch": 2.93, "learning_rate": 3.930215084061238e-05, "loss": 73.8606, "step": 3461, "task_loss": 1.4400956630706787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6690239275115815, "compression/movement_sparsity/importance_threshold": -0.0023180686433386267, "compression/movement_sparsity/linear_layer_sparsity": 0.6511622915464711, "compression/movement_sparsity/model_sparsity": 0.6287928636223332, "compression_loss": 71.59075927734375, "distillation_loss": 1.6505866050720215, "epoch": 2.93, "learning_rate": 3.9297454682070064e-05, "loss": 74.2581, "step": 3462, "task_loss": 1.370280385017395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6694278852446311, "compression/movement_sparsity/importance_threshold": -0.0023152394304980237, "compression/movement_sparsity/linear_layer_sparsity": 0.6516874318891545, "compression/movement_sparsity/model_sparsity": 0.6292999637787248, "compression_loss": 71.63359069824219, "distillation_loss": 1.883472204208374, "epoch": 2.93, "learning_rate": 3.929275852352776e-05, "loss": 74.1864, "step": 3463, "task_loss": 0.7973963618278503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6698315141557751, "compression/movement_sparsity/importance_threshold": -0.0023124125206388513, "compression/movement_sparsity/linear_layer_sparsity": 0.6521459757555915, "compression/movement_sparsity/model_sparsity": 0.6297427552527038, "compression_loss": 71.67642974853516, "distillation_loss": 1.860454797744751, "epoch": 2.93, "learning_rate": 3.9288062364985443e-05, "loss": 73.6535, "step": 3464, "task_loss": 1.5932941436767578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6702348143788992, "compression/movement_sparsity/importance_threshold": -0.002309587912823408, "compression/movement_sparsity/linear_layer_sparsity": 0.6527133634242088, "compression/movement_sparsity/model_sparsity": 0.6302906514094154, "compression_loss": 71.71917724609375, "distillation_loss": 3.1045479774475098, "epoch": 2.93, "learning_rate": 3.9283366206443137e-05, "loss": 73.9588, "step": 3465, "task_loss": 1.8902684450149536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6706377860478878, "compression/movement_sparsity/importance_threshold": -0.0023067656061140033, "compression/movement_sparsity/linear_layer_sparsity": 0.653129338012186, "compression/movement_sparsity/model_sparsity": 0.630692335990608, "compression_loss": 71.76187133789062, "distillation_loss": 2.717440128326416, "epoch": 2.93, "learning_rate": 3.9278670047900816e-05, "loss": 74.422, "step": 3466, "task_loss": 1.708390474319458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6710404292966268, "compression/movement_sparsity/importance_threshold": -0.0023039455995729373, "compression/movement_sparsity/linear_layer_sparsity": 0.6534962446503414, "compression/movement_sparsity/model_sparsity": 0.631046638257006, "compression_loss": 71.80453491210938, "distillation_loss": 2.693601369857788, "epoch": 2.93, "learning_rate": 3.927397388935851e-05, "loss": 74.4821, "step": 3467, "task_loss": 1.571959137916565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.671442744259001, "compression/movement_sparsity/importance_threshold": -0.0023011278922625148, "compression/movement_sparsity/linear_layer_sparsity": 0.6539202561273051, "compression/movement_sparsity/model_sparsity": 0.6314560836353241, "compression_loss": 71.84719848632812, "distillation_loss": 2.535973072052002, "epoch": 2.93, "learning_rate": 3.9269277730816196e-05, "loss": 74.7812, "step": 3468, "task_loss": 1.4639322757720947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6718447310688952, "compression/movement_sparsity/importance_threshold": -0.0022983124832450423, "compression/movement_sparsity/linear_layer_sparsity": 0.654498888286003, "compression/movement_sparsity/model_sparsity": 0.63201483799929, "compression_loss": 71.8897933959961, "distillation_loss": 2.520115375518799, "epoch": 2.93, "learning_rate": 3.926458157227388e-05, "loss": 74.273, "step": 3469, "task_loss": 1.4775841236114502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6722463898601954, "compression/movement_sparsity/importance_threshold": -0.0022954993715828187, "compression/movement_sparsity/linear_layer_sparsity": 0.654968664718353, "compression/movement_sparsity/model_sparsity": 0.6324684761659876, "compression_loss": 71.93238067626953, "distillation_loss": 3.3124871253967285, "epoch": 2.93, "learning_rate": 3.925988541373157e-05, "loss": 74.7481, "step": 3470, "task_loss": 1.8966056108474731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6726477207667857, "compression/movement_sparsity/importance_threshold": -0.0022926885563381537, "compression/movement_sparsity/linear_layer_sparsity": 0.6553491649076134, "compression/movement_sparsity/model_sparsity": 0.6328359050031913, "compression_loss": 71.97491455078125, "distillation_loss": 1.986694574356079, "epoch": 2.93, "learning_rate": 3.9255189255189255e-05, "loss": 74.3775, "step": 3471, "task_loss": 1.648299217224121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6730487239225522, "compression/movement_sparsity/importance_threshold": -0.002289880036573346, "compression/movement_sparsity/linear_layer_sparsity": 0.6558457826413119, "compression/movement_sparsity/model_sparsity": 0.6333154623899624, "compression_loss": 72.01742553710938, "distillation_loss": 2.9936611652374268, "epoch": 2.93, "learning_rate": 3.925049309664695e-05, "loss": 75.2096, "step": 3472, "task_loss": 2.2089285850524902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6734493994613793, "compression/movement_sparsity/importance_threshold": -0.002287073811350704, "compression/movement_sparsity/linear_layer_sparsity": 0.6562686732465177, "compression/movement_sparsity/model_sparsity": 0.6337238254019157, "compression_loss": 72.05985260009766, "distillation_loss": 1.813371181488037, "epoch": 2.94, "learning_rate": 3.9245796938104634e-05, "loss": 74.6188, "step": 3473, "task_loss": 1.2873101234436035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6738497475171527, "compression/movement_sparsity/importance_threshold": -0.0022842698797325284, "compression/movement_sparsity/linear_layer_sparsity": 0.6568422734064733, "compression/movement_sparsity/model_sparsity": 0.6342777206317763, "compression_loss": 72.10228729248047, "distillation_loss": 3.282653331756592, "epoch": 2.94, "learning_rate": 3.924110077956232e-05, "loss": 75.2994, "step": 3474, "task_loss": 1.5861778259277344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6742497682237569, "compression/movement_sparsity/importance_threshold": -0.002281468240781127, "compression/movement_sparsity/linear_layer_sparsity": 0.6572695878778722, "compression/movement_sparsity/model_sparsity": 0.6346903555365095, "compression_loss": 72.14468383789062, "distillation_loss": 3.0566153526306152, "epoch": 2.94, "learning_rate": 3.923640462102001e-05, "loss": 74.8987, "step": 3475, "task_loss": 2.2819125652313232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6746494617150778, "compression/movement_sparsity/importance_threshold": -0.002278668893558799, "compression/movement_sparsity/linear_layer_sparsity": 0.6577697351651908, "compression/movement_sparsity/model_sparsity": 0.6351733212258757, "compression_loss": 72.18702697753906, "distillation_loss": 3.2139034271240234, "epoch": 2.94, "learning_rate": 3.923170846247769e-05, "loss": 74.7968, "step": 3476, "task_loss": 1.3146706819534302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.675048828125, "compression/movement_sparsity/importance_threshold": -0.002275871837127852, "compression/movement_sparsity/linear_layer_sparsity": 0.6582559669488783, "compression/movement_sparsity/model_sparsity": 0.6356428494519697, "compression_loss": 72.22935485839844, "distillation_loss": 2.0855443477630615, "epoch": 2.94, "learning_rate": 3.9227012303935386e-05, "loss": 74.7817, "step": 3477, "task_loss": 1.0672084093093872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6754478675874089, "compression/movement_sparsity/importance_threshold": -0.002273077070550587, "compression/movement_sparsity/linear_layer_sparsity": 0.6587030039935469, "compression/movement_sparsity/model_sparsity": 0.6360745293989071, "compression_loss": 72.2716064453125, "distillation_loss": 2.6618552207946777, "epoch": 2.94, "learning_rate": 3.9222316145393066e-05, "loss": 75.107, "step": 3478, "task_loss": 1.771591305732727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6758465802361895, "compression/movement_sparsity/importance_threshold": -0.0022702845928893114, "compression/movement_sparsity/linear_layer_sparsity": 0.6591643381152107, "compression/movement_sparsity/model_sparsity": 0.636520015274262, "compression_loss": 72.31391906738281, "distillation_loss": 2.6960034370422363, "epoch": 2.94, "learning_rate": 3.921761998685076e-05, "loss": 74.866, "step": 3479, "task_loss": 1.9516239166259766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6762449662052268, "compression/movement_sparsity/importance_threshold": -0.0022674944032063287, "compression/movement_sparsity/linear_layer_sparsity": 0.6596007984231862, "compression/movement_sparsity/model_sparsity": 0.6369414818279496, "compression_loss": 72.35610961914062, "distillation_loss": 3.267400026321411, "epoch": 2.94, "learning_rate": 3.9212923828308445e-05, "loss": 74.549, "step": 3480, "task_loss": 2.1141860485076904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6766430256284065, "compression/movement_sparsity/importance_threshold": -0.0022647065005639384, "compression/movement_sparsity/linear_layer_sparsity": 0.660014292784295, "compression/movement_sparsity/model_sparsity": 0.6373407713856969, "compression_loss": 72.3982925415039, "distillation_loss": 4.121175289154053, "epoch": 2.94, "learning_rate": 3.920822766976613e-05, "loss": 75.1532, "step": 3481, "task_loss": 2.6890432834625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.677040758639613, "compression/movement_sparsity/importance_threshold": -0.0022619208840244506, "compression/movement_sparsity/linear_layer_sparsity": 0.66050336251988, "compression/movement_sparsity/model_sparsity": 0.63781304007131, "compression_loss": 72.44044494628906, "distillation_loss": 2.5138580799102783, "epoch": 2.94, "learning_rate": 3.9203531511223825e-05, "loss": 74.9983, "step": 3482, "task_loss": 1.8378770351409912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6774381653727322, "compression/movement_sparsity/importance_threshold": -0.002259137552650164, "compression/movement_sparsity/linear_layer_sparsity": 0.661125935236316, "compression/movement_sparsity/model_sparsity": 0.6384142254996787, "compression_loss": 72.4825210571289, "distillation_loss": 3.3643221855163574, "epoch": 2.94, "learning_rate": 3.9198835352681504e-05, "loss": 75.5401, "step": 3483, "task_loss": 1.4657042026519775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6778352459616486, "compression/movement_sparsity/importance_threshold": -0.0022563565055033867, "compression/movement_sparsity/linear_layer_sparsity": 0.6614776504849035, "compression/movement_sparsity/model_sparsity": 0.6387538582474745, "compression_loss": 72.52464294433594, "distillation_loss": 3.1803340911865234, "epoch": 2.94, "learning_rate": 3.91941391941392e-05, "loss": 75.2395, "step": 3484, "task_loss": 1.7452113628387451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6782320005402478, "compression/movement_sparsity/importance_threshold": -0.0022535777416464193, "compression/movement_sparsity/linear_layer_sparsity": 0.6619202636633791, "compression/movement_sparsity/model_sparsity": 0.6391812663016321, "compression_loss": 72.56664276123047, "distillation_loss": 3.8766181468963623, "epoch": 2.95, "learning_rate": 3.9189443035596884e-05, "loss": 75.2579, "step": 3485, "task_loss": 2.7921416759490967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6786284292424143, "compression/movement_sparsity/importance_threshold": -0.002250801260141571, "compression/movement_sparsity/linear_layer_sparsity": 0.6623848292344723, "compression/movement_sparsity/model_sparsity": 0.6396298726161874, "compression_loss": 72.60862731933594, "distillation_loss": 1.689774513244629, "epoch": 2.95, "learning_rate": 3.918474687705457e-05, "loss": 75.1176, "step": 3486, "task_loss": 1.2145459651947021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6790245322020341, "compression/movement_sparsity/importance_threshold": -0.002248027060051138, "compression/movement_sparsity/linear_layer_sparsity": 0.6628492755638892, "compression/movement_sparsity/model_sparsity": 0.6400783637853847, "compression_loss": 72.65055847167969, "distillation_loss": 2.478976011276245, "epoch": 2.95, "learning_rate": 3.9180050718512256e-05, "loss": 75.03, "step": 3487, "task_loss": 2.0646066665649414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6794203095529917, "compression/movement_sparsity/importance_threshold": -0.0022452551404374306, "compression/movement_sparsity/linear_layer_sparsity": 0.6632070602138032, "compression/movement_sparsity/model_sparsity": 0.6404238574319, "compression_loss": 72.69247436523438, "distillation_loss": 2.3888416290283203, "epoch": 2.95, "learning_rate": 3.917535455996994e-05, "loss": 75.2002, "step": 3488, "task_loss": 1.6714826822280884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6798157614291724, "compression/movement_sparsity/importance_threshold": -0.002242485500362751, "compression/movement_sparsity/linear_layer_sparsity": 0.6638220491596228, "compression/movement_sparsity/model_sparsity": 0.6410177196155034, "compression_loss": 72.7343521118164, "distillation_loss": 3.100355863571167, "epoch": 2.95, "learning_rate": 3.9170658401427636e-05, "loss": 75.6369, "step": 3489, "task_loss": 2.152700185775757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6802108879644617, "compression/movement_sparsity/importance_threshold": -0.0022397181388894016, "compression/movement_sparsity/linear_layer_sparsity": 0.6644480321880027, "compression/movement_sparsity/model_sparsity": 0.6416221982011094, "compression_loss": 72.77621459960938, "distillation_loss": 3.2960681915283203, "epoch": 2.95, "learning_rate": 3.916596224288532e-05, "loss": 75.6987, "step": 3490, "task_loss": 1.9321776628494263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6806056892927441, "compression/movement_sparsity/importance_threshold": -0.002236953055079689, "compression/movement_sparsity/linear_layer_sparsity": 0.6647963490488139, "compression/movement_sparsity/model_sparsity": 0.6419585493062038, "compression_loss": 72.81808471679688, "distillation_loss": 3.720386028289795, "epoch": 2.95, "learning_rate": 3.916126608434301e-05, "loss": 76.0369, "step": 3491, "task_loss": 2.1342122554779053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6810001655479054, "compression/movement_sparsity/importance_threshold": -0.002234190247995914, "compression/movement_sparsity/linear_layer_sparsity": 0.6652792062930606, "compression/movement_sparsity/model_sparsity": 0.6424248189186679, "compression_loss": 72.85981750488281, "distillation_loss": 4.349209785461426, "epoch": 2.95, "learning_rate": 3.9156569925800695e-05, "loss": 76.289, "step": 3492, "task_loss": 1.3346827030181885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6813943168638301, "compression/movement_sparsity/importance_threshold": -0.002231429716700385, "compression/movement_sparsity/linear_layer_sparsity": 0.6656481996605523, "compression/movement_sparsity/model_sparsity": 0.6427811362288299, "compression_loss": 72.9016342163086, "distillation_loss": 2.6415183544158936, "epoch": 2.95, "learning_rate": 3.915187376725839e-05, "loss": 75.8007, "step": 3493, "task_loss": 1.3116129636764526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.681788143374404, "compression/movement_sparsity/importance_threshold": -0.0022286714602554004, "compression/movement_sparsity/linear_layer_sparsity": 0.6661606407647035, "compression/movement_sparsity/model_sparsity": 0.6432759734046003, "compression_loss": 72.943359375, "distillation_loss": 3.14553165435791, "epoch": 2.95, "learning_rate": 3.9147177608716074e-05, "loss": 75.9407, "step": 3494, "task_loss": 2.4717612266540527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6821816452135114, "compression/movement_sparsity/importance_threshold": -0.00222591547772327, "compression/movement_sparsity/linear_layer_sparsity": 0.6666884401967698, "compression/movement_sparsity/model_sparsity": 0.6437856413024741, "compression_loss": 72.98501586914062, "distillation_loss": 2.3049697875976562, "epoch": 2.95, "learning_rate": 3.914248145017376e-05, "loss": 75.765, "step": 3495, "task_loss": 1.2274569272994995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6825748225150383, "compression/movement_sparsity/importance_threshold": -0.002223161768166294, "compression/movement_sparsity/linear_layer_sparsity": 0.6671546274546615, "compression/movement_sparsity/model_sparsity": 0.6442358135938975, "compression_loss": 73.02664947509766, "distillation_loss": 5.13464879989624, "epoch": 2.95, "learning_rate": 3.913778529163145e-05, "loss": 76.3102, "step": 3496, "task_loss": 2.374159097671509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6829676754128695, "compression/movement_sparsity/importance_threshold": -0.0022204103306467764, "compression/movement_sparsity/linear_layer_sparsity": 0.6676211128167441, "compression/movement_sparsity/model_sparsity": 0.6446862737487157, "compression_loss": 73.06830596923828, "distillation_loss": 3.086169719696045, "epoch": 2.96, "learning_rate": 3.913308913308913e-05, "loss": 76.0823, "step": 3497, "task_loss": 1.9866819381713867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.68336020404089, "compression/movement_sparsity/importance_threshold": -0.0022176611642270227, "compression/movement_sparsity/linear_layer_sparsity": 0.6681135570917699, "compression/movement_sparsity/model_sparsity": 0.6451618010479586, "compression_loss": 73.10980224609375, "distillation_loss": 2.7235374450683594, "epoch": 2.96, "learning_rate": 3.9128392974546826e-05, "loss": 75.8763, "step": 3498, "task_loss": 2.1649608612060547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6837524085329851, "compression/movement_sparsity/importance_threshold": -0.002214914267969336, "compression/movement_sparsity/linear_layer_sparsity": 0.6685471436753452, "compression/movement_sparsity/model_sparsity": 0.6455804925985197, "compression_loss": 73.15141296386719, "distillation_loss": 3.235076427459717, "epoch": 2.96, "learning_rate": 3.912369681600451e-05, "loss": 76.1227, "step": 3499, "task_loss": 2.4782118797302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6841442890230398, "compression/movement_sparsity/importance_threshold": -0.002212169640936022, "compression/movement_sparsity/linear_layer_sparsity": 0.6690345321032934, "compression/movement_sparsity/model_sparsity": 0.6460511377345858, "compression_loss": 73.19290161132812, "distillation_loss": 3.722426414489746, "epoch": 2.96, "learning_rate": 3.91190006574622e-05, "loss": 75.576, "step": 3500, "task_loss": 2.0049216747283936 }, { "epoch": 2.96, "eval_accuracy": 0.695089108910891, "eval_loss": 75.57647705078125, "eval_runtime": 227.9882, "eval_samples_per_second": 110.751, "eval_steps_per_second": 0.868, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6845358456449396, "compression/movement_sparsity/importance_threshold": -0.0022094272821893803, "compression/movement_sparsity/linear_layer_sparsity": 0.6696490202340724, "compression/movement_sparsity/model_sparsity": 0.6466445163076858, "compression_loss": 73.23440551757812, "distillation_loss": 2.712944507598877, "epoch": 2.96, "learning_rate": 3.9114304498919885e-05, "loss": 76.3992, "step": 3501, "task_loss": 1.5083972215652466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6849270785325692, "compression/movement_sparsity/importance_threshold": -0.00220668719079172, "compression/movement_sparsity/linear_layer_sparsity": 0.6700302477975585, "compression/movement_sparsity/model_sparsity": 0.6470126475315732, "compression_loss": 73.27590942382812, "distillation_loss": 3.7012975215911865, "epoch": 2.96, "learning_rate": 3.910960834037757e-05, "loss": 76.4017, "step": 3502, "task_loss": 2.568208694458008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6853179878198141, "compression/movement_sparsity/importance_threshold": -0.002203949365805341, "compression/movement_sparsity/linear_layer_sparsity": 0.6703995750417441, "compression/movement_sparsity/model_sparsity": 0.6473692872487374, "compression_loss": 73.31735229492188, "distillation_loss": 3.4675464630126953, "epoch": 2.96, "learning_rate": 3.9104912181835265e-05, "loss": 76.1554, "step": 3503, "task_loss": 1.3452204465866089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6857085736405589, "compression/movement_sparsity/importance_threshold": -0.0022012138062925523, "compression/movement_sparsity/linear_layer_sparsity": 0.6708060579922831, "compression/movement_sparsity/model_sparsity": 0.6477618062594376, "compression_loss": 73.35868835449219, "distillation_loss": 3.877480983734131, "epoch": 2.96, "learning_rate": 3.9100216023292944e-05, "loss": 76.3187, "step": 3504, "task_loss": 2.494652271270752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6860988361286895, "compression/movement_sparsity/importance_threshold": -0.002198480511315652, "compression/movement_sparsity/linear_layer_sparsity": 0.6710963399292105, "compression/movement_sparsity/model_sparsity": 0.6480421161188198, "compression_loss": 73.40009307861328, "distillation_loss": 3.388078212738037, "epoch": 2.96, "learning_rate": 3.909551986475064e-05, "loss": 76.2064, "step": 3505, "task_loss": 1.7386434078216553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6864887754180904, "compression/movement_sparsity/importance_threshold": -0.002195749479936948, "compression/movement_sparsity/linear_layer_sparsity": 0.6715159036916459, "compression/movement_sparsity/model_sparsity": 0.6484472665752865, "compression_loss": 73.44144439697266, "distillation_loss": 2.8493294715881348, "epoch": 2.96, "learning_rate": 3.9090823706208324e-05, "loss": 76.7381, "step": 3506, "task_loss": 2.541344404220581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6868783916426471, "compression/movement_sparsity/importance_threshold": -0.002193020711218743, "compression/movement_sparsity/linear_layer_sparsity": 0.6719584930217863, "compression/movement_sparsity/model_sparsity": 0.6488746516003725, "compression_loss": 73.4827651977539, "distillation_loss": 2.8282599449157715, "epoch": 2.96, "learning_rate": 3.908612754766601e-05, "loss": 76.5251, "step": 3507, "task_loss": 1.9676721096038818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6872676849362445, "compression/movement_sparsity/importance_threshold": -0.002190294204223342, "compression/movement_sparsity/linear_layer_sparsity": 0.6723945479080622, "compression/movement_sparsity/model_sparsity": 0.649295726659843, "compression_loss": 73.52401733398438, "distillation_loss": 2.9349255561828613, "epoch": 2.96, "learning_rate": 3.90814313891237e-05, "loss": 76.374, "step": 3508, "task_loss": 1.529220461845398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6876566554327679, "compression/movement_sparsity/importance_threshold": -0.002187569958013047, "compression/movement_sparsity/linear_layer_sparsity": 0.6728441009521019, "compression/movement_sparsity/model_sparsity": 0.6497298361738331, "compression_loss": 73.56525421142578, "distillation_loss": 2.720327377319336, "epoch": 2.97, "learning_rate": 3.907673523058138e-05, "loss": 76.1778, "step": 3509, "task_loss": 2.768578052520752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6880453032661026, "compression/movement_sparsity/importance_threshold": -0.0021848479716501624, "compression/movement_sparsity/linear_layer_sparsity": 0.6732191756450879, "compression/movement_sparsity/model_sparsity": 0.6500920258972503, "compression_loss": 73.60649108886719, "distillation_loss": 4.758685111999512, "epoch": 2.97, "learning_rate": 3.9072039072039076e-05, "loss": 77.0673, "step": 3510, "task_loss": 3.797227382659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6884336285701332, "compression/movement_sparsity/importance_threshold": -0.0021821282441969944, "compression/movement_sparsity/linear_layer_sparsity": 0.6737286476314979, "compression/movement_sparsity/model_sparsity": 0.6505839959536079, "compression_loss": 73.64765930175781, "distillation_loss": 2.2860989570617676, "epoch": 2.97, "learning_rate": 3.906734291349676e-05, "loss": 76.3011, "step": 3511, "task_loss": 3.141240358352661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6888216314787456, "compression/movement_sparsity/importance_threshold": -0.0021794107747158432, "compression/movement_sparsity/linear_layer_sparsity": 0.6742102170656398, "compression/movement_sparsity/model_sparsity": 0.6510490219962062, "compression_loss": 73.68878173828125, "distillation_loss": 2.670865535736084, "epoch": 2.97, "learning_rate": 3.906264675495445e-05, "loss": 76.0113, "step": 3512, "task_loss": 1.9518189430236816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6892093121258241, "compression/movement_sparsity/importance_threshold": -0.002176695562269018, "compression/movement_sparsity/linear_layer_sparsity": 0.6746543088409023, "compression/movement_sparsity/model_sparsity": 0.6514778578528023, "compression_loss": 73.72982025146484, "distillation_loss": 3.69225811958313, "epoch": 2.97, "learning_rate": 3.9057950596412135e-05, "loss": 76.7323, "step": 3513, "task_loss": 2.1744210720062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6895966706452548, "compression/movement_sparsity/importance_threshold": -0.0021739826059188165, "compression/movement_sparsity/linear_layer_sparsity": 0.6749810668066277, "compression/movement_sparsity/model_sparsity": 0.6517933906771801, "compression_loss": 73.77082824707031, "distillation_loss": 4.02522087097168, "epoch": 2.97, "learning_rate": 3.905325443786982e-05, "loss": 76.1552, "step": 3514, "task_loss": 2.1607179641723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6899837071709218, "compression/movement_sparsity/importance_threshold": -0.0021712719047275487, "compression/movement_sparsity/linear_layer_sparsity": 0.6754061514586787, "compression/movement_sparsity/model_sparsity": 0.6522038723637197, "compression_loss": 73.81180572509766, "distillation_loss": 1.8403013944625854, "epoch": 2.97, "learning_rate": 3.9048558279327515e-05, "loss": 76.755, "step": 3515, "task_loss": 1.2503207921981812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.690370421836711, "compression/movement_sparsity/importance_threshold": -0.0021685634577575143, "compression/movement_sparsity/linear_layer_sparsity": 0.6759023995431803, "compression/movement_sparsity/model_sparsity": 0.6526830727998811, "compression_loss": 73.85281372070312, "distillation_loss": 4.190285682678223, "epoch": 2.97, "learning_rate": 3.90438621207852e-05, "loss": 76.5528, "step": 3516, "task_loss": 2.2722229957580566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6907568147765073, "compression/movement_sparsity/importance_threshold": -0.0021658572640710187, "compression/movement_sparsity/linear_layer_sparsity": 0.6763273888018901, "compression/movement_sparsity/model_sparsity": 0.6530934623701343, "compression_loss": 73.89366149902344, "distillation_loss": 2.6246254444122314, "epoch": 2.97, "learning_rate": 3.903916596224289e-05, "loss": 76.7156, "step": 3517, "task_loss": 1.2652907371520996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6911428861241957, "compression/movement_sparsity/importance_threshold": -0.002163153322730367, "compression/movement_sparsity/linear_layer_sparsity": 0.6766816677465193, "compression/movement_sparsity/model_sparsity": 0.653435570743126, "compression_loss": 73.93456268310547, "distillation_loss": 2.761120557785034, "epoch": 2.97, "learning_rate": 3.9034469803700574e-05, "loss": 76.4856, "step": 3518, "task_loss": 1.4737898111343384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6915286360136617, "compression/movement_sparsity/importance_threshold": -0.002160451632797861, "compression/movement_sparsity/linear_layer_sparsity": 0.6769343885553938, "compression/movement_sparsity/model_sparsity": 0.6536796098147554, "compression_loss": 73.9753646850586, "distillation_loss": 2.7445590496063232, "epoch": 2.97, "learning_rate": 3.902977364515826e-05, "loss": 77.0944, "step": 3519, "task_loss": 1.4786510467529297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6919140645787898, "compression/movement_sparsity/importance_threshold": -0.002157752193335808, "compression/movement_sparsity/linear_layer_sparsity": 0.6774305293223868, "compression/movement_sparsity/model_sparsity": 0.6541587066200947, "compression_loss": 74.01618194580078, "distillation_loss": 2.495396614074707, "epoch": 2.97, "learning_rate": 3.902507748661595e-05, "loss": 77.1416, "step": 3520, "task_loss": 1.097321629524231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.692299171953466, "compression/movement_sparsity/importance_threshold": -0.002155055003406507, "compression/movement_sparsity/linear_layer_sparsity": 0.6778166815671065, "compression/movement_sparsity/model_sparsity": 0.6545315933472651, "compression_loss": 74.05694580078125, "distillation_loss": 1.8660991191864014, "epoch": 2.98, "learning_rate": 3.902038132807363e-05, "loss": 76.3922, "step": 3521, "task_loss": 1.5311673879623413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6926839582715747, "compression/movement_sparsity/importance_threshold": -0.0021523600620722673, "compression/movement_sparsity/linear_layer_sparsity": 0.6782045389677983, "compression/movement_sparsity/model_sparsity": 0.6549061266530541, "compression_loss": 74.0976791381836, "distillation_loss": 2.4687018394470215, "epoch": 2.98, "learning_rate": 3.9015685169531326e-05, "loss": 76.2609, "step": 3522, "task_loss": 1.1037546396255493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6930684236670016, "compression/movement_sparsity/importance_threshold": -0.002149667368395388, "compression/movement_sparsity/linear_layer_sparsity": 0.6787277356711571, "compression/movement_sparsity/model_sparsity": 0.6554113499401112, "compression_loss": 74.1384048461914, "distillation_loss": 3.080456018447876, "epoch": 2.98, "learning_rate": 3.901098901098901e-05, "loss": 77.0713, "step": 3523, "task_loss": 1.8184940814971924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6934525682736313, "compression/movement_sparsity/importance_threshold": -0.0021469769214381767, "compression/movement_sparsity/linear_layer_sparsity": 0.6791918600480478, "compression/movement_sparsity/model_sparsity": 0.6558595302168421, "compression_loss": 74.17906951904297, "distillation_loss": 5.344025611877441, "epoch": 2.98, "learning_rate": 3.9006292852446705e-05, "loss": 77.215, "step": 3524, "task_loss": 2.8443946838378906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6938363922253495, "compression/movement_sparsity/importance_threshold": -0.0021442887202629355, "compression/movement_sparsity/linear_layer_sparsity": 0.6795479753144927, "compression/movement_sparsity/model_sparsity": 0.6562034118283461, "compression_loss": 74.21961212158203, "distillation_loss": 2.9555575847625732, "epoch": 2.98, "learning_rate": 3.9001596693904385e-05, "loss": 77.0161, "step": 3525, "task_loss": 1.3013581037521362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6942198956560408, "compression/movement_sparsity/importance_threshold": -0.00214160276393197, "compression/movement_sparsity/linear_layer_sparsity": 0.6798631310651087, "compression/movement_sparsity/model_sparsity": 0.6565077410093958, "compression_loss": 74.26028442382812, "distillation_loss": 3.1672213077545166, "epoch": 2.98, "learning_rate": 3.899690053536207e-05, "loss": 77.5017, "step": 3526, "task_loss": 1.9107997417449951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6946030786995907, "compression/movement_sparsity/importance_threshold": -0.0021389190515075825, "compression/movement_sparsity/linear_layer_sparsity": 0.680318884676319, "compression/movement_sparsity/model_sparsity": 0.6569478380819989, "compression_loss": 74.30083465576172, "distillation_loss": 4.137566566467285, "epoch": 2.98, "learning_rate": 3.8992204376819764e-05, "loss": 77.5431, "step": 3527, "task_loss": 2.018787145614624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6949859414898842, "compression/movement_sparsity/importance_threshold": -0.0021362375820520784, "compression/movement_sparsity/linear_layer_sparsity": 0.6807098185122608, "compression/movement_sparsity/model_sparsity": 0.657325342138023, "compression_loss": 74.34141540527344, "distillation_loss": 2.014414072036743, "epoch": 2.98, "learning_rate": 3.898750821827745e-05, "loss": 77.1238, "step": 3528, "task_loss": 1.3450452089309692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6953684841608067, "compression/movement_sparsity/importance_threshold": -0.0021335583546277593, "compression/movement_sparsity/linear_layer_sparsity": 0.6812058877342478, "compression/movement_sparsity/model_sparsity": 0.6578043698561474, "compression_loss": 74.3819351196289, "distillation_loss": 2.682356834411621, "epoch": 2.98, "learning_rate": 3.8982812059735144e-05, "loss": 77.3362, "step": 3529, "task_loss": 0.8740662336349487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.695750706846243, "compression/movement_sparsity/importance_threshold": -0.0021308813682969314, "compression/movement_sparsity/linear_layer_sparsity": 0.681595891485114, "compression/movement_sparsity/model_sparsity": 0.6581809757783794, "compression_loss": 74.42241668701172, "distillation_loss": 3.363556385040283, "epoch": 2.98, "learning_rate": 3.897811590119282e-05, "loss": 77.653, "step": 3530, "task_loss": 2.9099674224853516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6961326096800781, "compression/movement_sparsity/importance_threshold": -0.0021282066221219005, "compression/movement_sparsity/linear_layer_sparsity": 0.6821610016377129, "compression/movement_sparsity/model_sparsity": 0.6587266726587543, "compression_loss": 74.46282958984375, "distillation_loss": 2.7154297828674316, "epoch": 2.98, "learning_rate": 3.8973419742650516e-05, "loss": 77.5275, "step": 3531, "task_loss": 1.8898917436599731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6965141927961978, "compression/movement_sparsity/importance_threshold": -0.0021255341151649645, "compression/movement_sparsity/linear_layer_sparsity": 0.6825992863433367, "compression/movement_sparsity/model_sparsity": 0.6591499009364185, "compression_loss": 74.5032730102539, "distillation_loss": 3.3017425537109375, "epoch": 2.99, "learning_rate": 3.89687235841082e-05, "loss": 77.5874, "step": 3532, "task_loss": 1.4407390356063843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6968954563284865, "compression/movement_sparsity/importance_threshold": -0.002122863846488434, "compression/movement_sparsity/linear_layer_sparsity": 0.6830630410710307, "compression/movement_sparsity/model_sparsity": 0.6595977242625397, "compression_loss": 74.5436019897461, "distillation_loss": 2.367194175720215, "epoch": 2.99, "learning_rate": 3.896402742556589e-05, "loss": 77.7123, "step": 3533, "task_loss": 1.6369836330413818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6972764004108299, "compression/movement_sparsity/importance_threshold": -0.002120195815154608, "compression/movement_sparsity/linear_layer_sparsity": 0.6834213980809912, "compression/movement_sparsity/model_sparsity": 0.6599437706067731, "compression_loss": 74.58390808105469, "distillation_loss": 4.44533109664917, "epoch": 2.99, "learning_rate": 3.8959331267023575e-05, "loss": 78.5384, "step": 3534, "task_loss": 2.429835796356201 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6976570251771128, "compression/movement_sparsity/importance_threshold": -0.0021175300202257935, "compression/movement_sparsity/linear_layer_sparsity": 0.6838346778070826, "compression/movement_sparsity/model_sparsity": 0.6603428529028761, "compression_loss": 74.62417602539062, "distillation_loss": 3.7314069271087646, "epoch": 2.99, "learning_rate": 3.895463510848126e-05, "loss": 77.8136, "step": 3535, "task_loss": 2.0037572383880615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6980373307612205, "compression/movement_sparsity/importance_threshold": -0.002114866460764294, "compression/movement_sparsity/linear_layer_sparsity": 0.6842380604740362, "compression/movement_sparsity/model_sparsity": 0.6607323781342695, "compression_loss": 74.66445922851562, "distillation_loss": 2.8498382568359375, "epoch": 2.99, "learning_rate": 3.8949938949938955e-05, "loss": 77.5993, "step": 3536, "task_loss": 1.7319574356079102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6984173172970382, "compression/movement_sparsity/importance_threshold": -0.002112205135832412, "compression/movement_sparsity/linear_layer_sparsity": 0.6846356479955188, "compression/movement_sparsity/model_sparsity": 0.661116307301267, "compression_loss": 74.70462799072266, "distillation_loss": 3.473423480987549, "epoch": 2.99, "learning_rate": 3.894524279139664e-05, "loss": 77.6012, "step": 3537, "task_loss": 2.665347099304199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6987969849184509, "compression/movement_sparsity/importance_threshold": -0.0021095460444924526, "compression/movement_sparsity/linear_layer_sparsity": 0.6851353302402996, "compression/movement_sparsity/model_sparsity": 0.6615988239237373, "compression_loss": 74.74482727050781, "distillation_loss": 2.902806520462036, "epoch": 2.99, "learning_rate": 3.894054663285433e-05, "loss": 77.6592, "step": 3538, "task_loss": 1.6976743936538696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6991763337593437, "compression/movement_sparsity/importance_threshold": -0.0021068891858067196, "compression/movement_sparsity/linear_layer_sparsity": 0.685689649021188, "compression/movement_sparsity/model_sparsity": 0.662134100149218, "compression_loss": 74.78495788574219, "distillation_loss": 3.601815700531006, "epoch": 2.99, "learning_rate": 3.8935850474312014e-05, "loss": 77.7685, "step": 3539, "task_loss": 2.4926445484161377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6995553639536018, "compression/movement_sparsity/importance_threshold": -0.002104234558837518, "compression/movement_sparsity/linear_layer_sparsity": 0.6861877095791704, "compression/movement_sparsity/model_sparsity": 0.6626150507948202, "compression_loss": 74.8250732421875, "distillation_loss": 4.3156538009643555, "epoch": 2.99, "learning_rate": 3.89311543157697e-05, "loss": 78.3369, "step": 3540, "task_loss": 2.5373926162719727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6999340756351107, "compression/movement_sparsity/importance_threshold": -0.0021015821626471486, "compression/movement_sparsity/linear_layer_sparsity": 0.6866432604795308, "compression/movement_sparsity/model_sparsity": 0.6630549521203148, "compression_loss": 74.86515045166016, "distillation_loss": 3.8798952102661133, "epoch": 2.99, "learning_rate": 3.892645815722739e-05, "loss": 78.1098, "step": 3541, "task_loss": 2.56223201751709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7003124689377547, "compression/movement_sparsity/importance_threshold": -0.002098931996297921, "compression/movement_sparsity/linear_layer_sparsity": 0.6870088912317494, "compression/movement_sparsity/model_sparsity": 0.6634080223313829, "compression_loss": 74.9051742553711, "distillation_loss": 3.5563862323760986, "epoch": 2.99, "learning_rate": 3.892176199868508e-05, "loss": 78.3024, "step": 3542, "task_loss": 2.727679491043091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7006905439954199, "compression/movement_sparsity/importance_threshold": -0.002096284058852132, "compression/movement_sparsity/linear_layer_sparsity": 0.6875650582586212, "compression/movement_sparsity/model_sparsity": 0.6639450833099118, "compression_loss": 74.94525909423828, "distillation_loss": 4.539097785949707, "epoch": 2.99, "learning_rate": 3.8917065840142766e-05, "loss": 77.5837, "step": 3543, "task_loss": 2.4861903190612793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7010683009419907, "compression/movement_sparsity/importance_threshold": -0.0020936383493720912, "compression/movement_sparsity/linear_layer_sparsity": 0.6878605629809732, "compression/movement_sparsity/model_sparsity": 0.6642304365359721, "compression_loss": 74.9852294921875, "distillation_loss": 2.2521543502807617, "epoch": 3.0, "learning_rate": 3.891236968160045e-05, "loss": 77.7262, "step": 3544, "task_loss": 1.2192625999450684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7014457399113527, "compression/movement_sparsity/importance_threshold": -0.0020909948669200996, "compression/movement_sparsity/linear_layer_sparsity": 0.6883322472801451, "compression/movement_sparsity/model_sparsity": 0.6646859170283967, "compression_loss": 75.02513122558594, "distillation_loss": 3.061636209487915, "epoch": 3.0, "learning_rate": 3.890767352305814e-05, "loss": 77.8963, "step": 3545, "task_loss": 1.07033371925354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7018228610373907, "compression/movement_sparsity/importance_threshold": -0.002088353610558465, "compression/movement_sparsity/linear_layer_sparsity": 0.6886782389282673, "compression/movement_sparsity/model_sparsity": 0.6650200227990112, "compression_loss": 75.06502532958984, "distillation_loss": 3.494990825653076, "epoch": 3.0, "learning_rate": 3.890297736451583e-05, "loss": 77.8407, "step": 3546, "task_loss": 1.8421080112457275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7021996644539901, "compression/movement_sparsity/importance_threshold": -0.0020857145793494866, "compression/movement_sparsity/linear_layer_sparsity": 0.6890669667931963, "compression/movement_sparsity/model_sparsity": 0.6653953966659132, "compression_loss": 75.10491943359375, "distillation_loss": 3.1889877319335938, "epoch": 3.0, "learning_rate": 3.889828120597351e-05, "loss": 78.1745, "step": 3547, "task_loss": 1.9380377531051636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7025761502950361, "compression/movement_sparsity/importance_threshold": -0.0020830777723554695, "compression/movement_sparsity/linear_layer_sparsity": 0.6895915347758332, "compression/movement_sparsity/model_sparsity": 0.6659019441245866, "compression_loss": 75.144775390625, "distillation_loss": 2.3045902252197266, "epoch": 3.0, "learning_rate": 3.8893585047431204e-05, "loss": 77.965, "step": 3548, "task_loss": 1.477068543434143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7029523186944135, "compression/movement_sparsity/importance_threshold": -0.0020804431886387196, "compression/movement_sparsity/linear_layer_sparsity": 0.6900735692525131, "compression/movement_sparsity/model_sparsity": 0.6663674192340809, "compression_loss": 75.1845474243164, "distillation_loss": 3.7252001762390137, "epoch": 3.0, "learning_rate": 3.888888888888889e-05, "loss": 78.5459, "step": 3549, "task_loss": 2.7672505378723145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7037037037037036, "compression/movement_sparsity/importance_threshold": -0.002075180687286236, "compression/movement_sparsity/linear_layer_sparsity": 0.6908862370494, "compression/movement_sparsity/model_sparsity": 0.6671521693920863, "compression_loss": 75.26439666748047, "distillation_loss": 4.037415504455566, "epoch": 3.0, "learning_rate": 3.888419273034658e-05, "loss": 137.5458, "step": 3550, "task_loss": 2.530160665512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7040789205813869, "compression/movement_sparsity/importance_threshold": -0.0020725527677751075, "compression/movement_sparsity/linear_layer_sparsity": 0.691168804049867, "compression/movement_sparsity/model_sparsity": 0.6674250293468095, "compression_loss": 75.30401611328125, "distillation_loss": 2.2475407123565674, "epoch": 3.0, "learning_rate": 3.8879496571804263e-05, "loss": 78.3594, "step": 3551, "task_loss": 1.0853458642959595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7044538205529419, "compression/movement_sparsity/importance_threshold": -0.0020699270677904643, "compression/movement_sparsity/linear_layer_sparsity": 0.6916943021175795, "compression/movement_sparsity/model_sparsity": 0.6679324749392749, "compression_loss": 75.34367370605469, "distillation_loss": 2.9350528717041016, "epoch": 3.0, "learning_rate": 3.887480041326195e-05, "loss": 78.3286, "step": 3552, "task_loss": 1.3534562587738037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7048284037522546, "compression/movement_sparsity/importance_threshold": -0.0020673035863946034, "compression/movement_sparsity/linear_layer_sparsity": 0.6921543961258093, "compression/movement_sparsity/model_sparsity": 0.6683767633029073, "compression_loss": 75.38325500488281, "distillation_loss": 2.7520534992218018, "epoch": 3.0, "learning_rate": 3.887010425471964e-05, "loss": 78.2598, "step": 3553, "task_loss": 1.9941890239715576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7052026703132095, "compression/movement_sparsity/importance_threshold": -0.0020646823226498356, "compression/movement_sparsity/linear_layer_sparsity": 0.6925586134844974, "compression/movement_sparsity/model_sparsity": 0.6687670945518064, "compression_loss": 75.42284393310547, "distillation_loss": 4.449623107910156, "epoch": 3.0, "learning_rate": 3.886540809617733e-05, "loss": 79.0145, "step": 3554, "task_loss": 2.9505841732025146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7055766203696922, "compression/movement_sparsity/importance_threshold": -0.0020620632756184597, "compression/movement_sparsity/linear_layer_sparsity": 0.6928542374485257, "compression/movement_sparsity/model_sparsity": 0.6690525629232247, "compression_loss": 75.46237182617188, "distillation_loss": 2.5959811210632324, "epoch": 3.01, "learning_rate": 3.8860711937635016e-05, "loss": 78.5603, "step": 3555, "task_loss": 1.5232499837875366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7059502540555873, "compression/movement_sparsity/importance_threshold": -0.002059446444362784, "compression/movement_sparsity/linear_layer_sparsity": 0.6933199477397121, "compression/movement_sparsity/model_sparsity": 0.6695022746332162, "compression_loss": 75.50188446044922, "distillation_loss": 2.5966243743896484, "epoch": 3.01, "learning_rate": 3.88560157790927e-05, "loss": 78.023, "step": 3556, "task_loss": 2.0346181392669678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7063235715047802, "compression/movement_sparsity/importance_threshold": -0.002056831827945111, "compression/movement_sparsity/linear_layer_sparsity": 0.6938405092020232, "compression/movement_sparsity/model_sparsity": 0.6700049532078627, "compression_loss": 75.54131317138672, "distillation_loss": 2.037461996078491, "epoch": 3.01, "learning_rate": 3.885131962055039e-05, "loss": 77.7502, "step": 3557, "task_loss": 1.2248094081878662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7066965728511565, "compression/movement_sparsity/importance_threshold": -0.002054219425427741, "compression/movement_sparsity/linear_layer_sparsity": 0.6942760394249231, "compression/movement_sparsity/model_sparsity": 0.6704255216277584, "compression_loss": 75.58076477050781, "distillation_loss": 5.4873504638671875, "epoch": 3.01, "learning_rate": 3.884662346200808e-05, "loss": 80.0207, "step": 3558, "task_loss": 4.12951135635376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7070692582286007, "compression/movement_sparsity/importance_threshold": -0.002051609235872983, "compression/movement_sparsity/linear_layer_sparsity": 0.6946422663855234, "compression/movement_sparsity/model_sparsity": 0.6707791675656161, "compression_loss": 75.62012481689453, "distillation_loss": 2.0779519081115723, "epoch": 3.01, "learning_rate": 3.884192730346577e-05, "loss": 78.8415, "step": 3559, "task_loss": 0.9649397134780884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7074416277709983, "compression/movement_sparsity/importance_threshold": -0.0020490012583431376, "compression/movement_sparsity/linear_layer_sparsity": 0.6950915213253722, "compression/movement_sparsity/model_sparsity": 0.6712129892162112, "compression_loss": 75.65950775146484, "distillation_loss": 4.026408672332764, "epoch": 3.01, "learning_rate": 3.8837231144923454e-05, "loss": 78.7413, "step": 3560, "task_loss": 2.7202627658843994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7078136816122341, "compression/movement_sparsity/importance_threshold": -0.002046395491900511, "compression/movement_sparsity/linear_layer_sparsity": 0.6954925310829663, "compression/movement_sparsity/model_sparsity": 0.6716002230549816, "compression_loss": 75.69883728027344, "distillation_loss": 3.7912116050720215, "epoch": 3.01, "learning_rate": 3.883253498638114e-05, "loss": 78.7645, "step": 3561, "task_loss": 2.6888856887817383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7081854198861938, "compression/movement_sparsity/importance_threshold": -0.0020437919356074054, "compression/movement_sparsity/linear_layer_sparsity": 0.6959806588093079, "compression/movement_sparsity/model_sparsity": 0.6720715820922669, "compression_loss": 75.73812103271484, "distillation_loss": 3.135347843170166, "epoch": 3.01, "learning_rate": 3.8827838827838833e-05, "loss": 78.8768, "step": 3562, "task_loss": 2.4276068210601807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7085568427267619, "compression/movement_sparsity/importance_threshold": -0.002041190588526127, "compression/movement_sparsity/linear_layer_sparsity": 0.6963649151113737, "compression/movement_sparsity/model_sparsity": 0.672442638008246, "compression_loss": 75.77738189697266, "distillation_loss": 2.9291934967041016, "epoch": 3.01, "learning_rate": 3.882314266929652e-05, "loss": 79.1526, "step": 3563, "task_loss": 2.239157199859619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7089279502678241, "compression/movement_sparsity/importance_threshold": -0.0020385914497189767, "compression/movement_sparsity/linear_layer_sparsity": 0.696942486019152, "compression/movement_sparsity/model_sparsity": 0.6730003675785262, "compression_loss": 75.8166275024414, "distillation_loss": 2.756098747253418, "epoch": 3.01, "learning_rate": 3.8818446510754206e-05, "loss": 78.8098, "step": 3564, "task_loss": 1.2386481761932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.709298742643265, "compression/movement_sparsity/importance_threshold": -0.002035994518248262, "compression/movement_sparsity/linear_layer_sparsity": 0.6972778293855753, "compression/movement_sparsity/model_sparsity": 0.673324190868676, "compression_loss": 75.85584259033203, "distillation_loss": 2.5799355506896973, "epoch": 3.01, "learning_rate": 3.881375035221189e-05, "loss": 78.6244, "step": 3565, "task_loss": 1.2727571725845337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7096692199869703, "compression/movement_sparsity/importance_threshold": -0.002033399793176284, "compression/movement_sparsity/linear_layer_sparsity": 0.6975952030313715, "compression/movement_sparsity/model_sparsity": 0.6736306617533835, "compression_loss": 75.89501190185547, "distillation_loss": 3.2921628952026367, "epoch": 3.01, "learning_rate": 3.880905419366958e-05, "loss": 79.2981, "step": 3566, "task_loss": 2.56274151802063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7100393824328246, "compression/movement_sparsity/importance_threshold": -0.002030807273565349, "compression/movement_sparsity/linear_layer_sparsity": 0.6979746896663828, "compression/movement_sparsity/model_sparsity": 0.6739971118550449, "compression_loss": 75.93416595458984, "distillation_loss": 3.6278750896453857, "epoch": 3.02, "learning_rate": 3.880435803512727e-05, "loss": 79.2073, "step": 3567, "task_loss": 1.7580965757369995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7104092301147134, "compression/movement_sparsity/importance_threshold": -0.00202821695847776, "compression/movement_sparsity/linear_layer_sparsity": 0.6984193538016917, "compression/movement_sparsity/model_sparsity": 0.674426500409359, "compression_loss": 75.97332763671875, "distillation_loss": 2.698019504547119, "epoch": 3.02, "learning_rate": 3.879966187658495e-05, "loss": 78.838, "step": 3568, "task_loss": 3.2207140922546387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7107787631665219, "compression/movement_sparsity/importance_threshold": -0.002025628846975819, "compression/movement_sparsity/linear_layer_sparsity": 0.6988625631885491, "compression/movement_sparsity/model_sparsity": 0.6748544841903064, "compression_loss": 76.01234436035156, "distillation_loss": 3.7371256351470947, "epoch": 3.02, "learning_rate": 3.8794965718042645e-05, "loss": 79.2318, "step": 3569, "task_loss": 1.8398133516311646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7111479817221347, "compression/movement_sparsity/importance_threshold": -0.0020230429381218346, "compression/movement_sparsity/linear_layer_sparsity": 0.6993459093236688, "compression/movement_sparsity/model_sparsity": 0.675321225898738, "compression_loss": 76.05135345458984, "distillation_loss": 3.2853474617004395, "epoch": 3.02, "learning_rate": 3.879026955950033e-05, "loss": 78.8324, "step": 3570, "task_loss": 2.6696784496307373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7115168859154377, "compression/movement_sparsity/importance_threshold": -0.002020459230978105, "compression/movement_sparsity/linear_layer_sparsity": 0.6998391286695911, "compression/movement_sparsity/model_sparsity": 0.6757975016428076, "compression_loss": 76.09036254882812, "distillation_loss": 3.316774606704712, "epoch": 3.02, "learning_rate": 3.878557340095802e-05, "loss": 79.1839, "step": 3571, "task_loss": 2.796229600906372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7118854758803155, "compression/movement_sparsity/importance_threshold": -0.002017877724606938, "compression/movement_sparsity/linear_layer_sparsity": 0.7000861735746708, "compression/movement_sparsity/model_sparsity": 0.6760360597953989, "compression_loss": 76.1292953491211, "distillation_loss": 2.4558284282684326, "epoch": 3.02, "learning_rate": 3.878087724241571e-05, "loss": 78.8733, "step": 3572, "task_loss": 2.29618501663208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7122537517506536, "compression/movement_sparsity/importance_threshold": -0.002015298418070636, "compression/movement_sparsity/linear_layer_sparsity": 0.7005630568109319, "compression/movement_sparsity/model_sparsity": 0.67649656062543, "compression_loss": 76.16820526123047, "distillation_loss": 3.788773775100708, "epoch": 3.02, "learning_rate": 3.877618108387339e-05, "loss": 78.9283, "step": 3573, "task_loss": 3.48488450050354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7126217136603368, "compression/movement_sparsity/importance_threshold": -0.0020127213104315046, "compression/movement_sparsity/linear_layer_sparsity": 0.701031557357345, "compression/movement_sparsity/model_sparsity": 0.6769489667367975, "compression_loss": 76.2071304321289, "distillation_loss": 2.7483935356140137, "epoch": 3.02, "learning_rate": 3.877148492533108e-05, "loss": 78.5841, "step": 3574, "task_loss": 1.562520146369934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7129893617432504, "compression/movement_sparsity/importance_threshold": -0.0020101464007518466, "compression/movement_sparsity/linear_layer_sparsity": 0.7013984282229976, "compression/movement_sparsity/model_sparsity": 0.6773032344595881, "compression_loss": 76.24596405029297, "distillation_loss": 2.970107316970825, "epoch": 3.02, "learning_rate": 3.876678876678877e-05, "loss": 79.2099, "step": 3575, "task_loss": 2.3123908042907715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7133566961332795, "compression/movement_sparsity/importance_threshold": -0.0020075736880939676, "compression/movement_sparsity/linear_layer_sparsity": 0.7019223403764145, "compression/movement_sparsity/model_sparsity": 0.6778091486187928, "compression_loss": 76.28477478027344, "distillation_loss": 4.013206481933594, "epoch": 3.02, "learning_rate": 3.8762092608246456e-05, "loss": 79.3435, "step": 3576, "task_loss": 3.422053098678589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7137237169643091, "compression/movement_sparsity/importance_threshold": -0.0020050031715201696, "compression/movement_sparsity/linear_layer_sparsity": 0.7024589875408666, "compression/movement_sparsity/model_sparsity": 0.6783273603022262, "compression_loss": 76.32354736328125, "distillation_loss": 2.5563859939575195, "epoch": 3.02, "learning_rate": 3.875739644970414e-05, "loss": 79.1602, "step": 3577, "task_loss": 0.7791175842285156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7140904243702249, "compression/movement_sparsity/importance_threshold": -0.0020024348500927557, "compression/movement_sparsity/linear_layer_sparsity": 0.7028259418756926, "compression/movement_sparsity/model_sparsity": 0.6786817086267674, "compression_loss": 76.36231994628906, "distillation_loss": 3.432677745819092, "epoch": 3.02, "learning_rate": 3.875270029116183e-05, "loss": 79.0639, "step": 3578, "task_loss": 1.0367989540100098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7144568184849114, "compression/movement_sparsity/importance_threshold": -0.0019998687228740325, "compression/movement_sparsity/linear_layer_sparsity": 0.7033268284614046, "compression/movement_sparsity/model_sparsity": 0.6791653882173528, "compression_loss": 76.40101623535156, "distillation_loss": 3.6220920085906982, "epoch": 3.03, "learning_rate": 3.874800413261952e-05, "loss": 79.4855, "step": 3579, "task_loss": 3.0541529655456543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7148228994422541, "compression/movement_sparsity/importance_threshold": -0.001997304788926302, "compression/movement_sparsity/linear_layer_sparsity": 0.7038418451857652, "compression/movement_sparsity/model_sparsity": 0.6796627125328548, "compression_loss": 76.4397201538086, "distillation_loss": 3.091390609741211, "epoch": 3.03, "learning_rate": 3.874330797407721e-05, "loss": 79.6463, "step": 3580, "task_loss": 1.9463034868240356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7151886673761378, "compression/movement_sparsity/importance_threshold": -0.0019947430473118708, "compression/movement_sparsity/linear_layer_sparsity": 0.7041304577392236, "compression/movement_sparsity/model_sparsity": 0.6799414103572259, "compression_loss": 76.47840881347656, "distillation_loss": 3.544201374053955, "epoch": 3.03, "learning_rate": 3.8738611815534894e-05, "loss": 79.5556, "step": 3581, "task_loss": 2.1705336570739746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7155541224204482, "compression/movement_sparsity/importance_threshold": -0.0019921834970930384, "compression/movement_sparsity/linear_layer_sparsity": 0.7045039465179141, "compression/movement_sparsity/model_sparsity": 0.6803020686473825, "compression_loss": 76.5170669555664, "distillation_loss": 3.277132511138916, "epoch": 3.03, "learning_rate": 3.873391565699258e-05, "loss": 79.3831, "step": 3582, "task_loss": 1.6507868766784668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.71591926470907, "compression/movement_sparsity/importance_threshold": -0.001989626137332113, "compression/movement_sparsity/linear_layer_sparsity": 0.7049047535646583, "compression/movement_sparsity/model_sparsity": 0.6806891067390445, "compression_loss": 76.55569458007812, "distillation_loss": 4.039057731628418, "epoch": 3.03, "learning_rate": 3.872921949845027e-05, "loss": 80.0292, "step": 3583, "task_loss": 2.9238855838775635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7162840943758886, "compression/movement_sparsity/importance_threshold": -0.001987070967091396, "compression/movement_sparsity/linear_layer_sparsity": 0.7052450573848182, "compression/movement_sparsity/model_sparsity": 0.6810177200760849, "compression_loss": 76.59428405761719, "distillation_loss": 5.368074893951416, "epoch": 3.03, "learning_rate": 3.872452333990796e-05, "loss": 79.6794, "step": 3584, "task_loss": 2.2665061950683594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7166486115547885, "compression/movement_sparsity/importance_threshold": -0.001984517985433195, "compression/movement_sparsity/linear_layer_sparsity": 0.7057160977789377, "compression/movement_sparsity/model_sparsity": 0.6814725787835766, "compression_loss": 76.63282775878906, "distillation_loss": 3.9251279830932617, "epoch": 3.03, "learning_rate": 3.871982718136564e-05, "loss": 80.2899, "step": 3585, "task_loss": 2.7647080421447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7170128163796559, "compression/movement_sparsity/importance_threshold": -0.001981967191419809, "compression/movement_sparsity/linear_layer_sparsity": 0.7061616919993223, "compression/movement_sparsity/model_sparsity": 0.6819028654716828, "compression_loss": 76.67135620117188, "distillation_loss": 4.1690239906311035, "epoch": 3.03, "learning_rate": 3.871513102282333e-05, "loss": 79.3071, "step": 3586, "task_loss": 2.2057766914367676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.717376708984375, "compression/movement_sparsity/importance_threshold": -0.001979418584113546, "compression/movement_sparsity/linear_layer_sparsity": 0.7067076877112008, "compression/movement_sparsity/model_sparsity": 0.682430104551179, "compression_loss": 76.70980072021484, "distillation_loss": 1.8608615398406982, "epoch": 3.03, "learning_rate": 3.871043486428102e-05, "loss": 79.5247, "step": 3587, "task_loss": 2.71101975440979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7177402895028313, "compression/movement_sparsity/importance_threshold": -0.0019768721625767083, "compression/movement_sparsity/linear_layer_sparsity": 0.7071375062578031, "compression/movement_sparsity/model_sparsity": 0.682845157508429, "compression_loss": 76.74826049804688, "distillation_loss": 1.9667669534683228, "epoch": 3.03, "learning_rate": 3.870573870573871e-05, "loss": 79.6373, "step": 3588, "task_loss": 1.2972990274429321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7181035580689104, "compression/movement_sparsity/importance_threshold": -0.001974327925871598, "compression/movement_sparsity/linear_layer_sparsity": 0.7073807592775748, "compression/movement_sparsity/model_sparsity": 0.6830800540386377, "compression_loss": 76.7866439819336, "distillation_loss": 2.801029920578003, "epoch": 3.03, "learning_rate": 3.87010425471964e-05, "loss": 79.4876, "step": 3589, "task_loss": 1.4843177795410156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7184665148164965, "compression/movement_sparsity/importance_threshold": -0.0019717858730605234, "compression/movement_sparsity/linear_layer_sparsity": 0.7078120564209639, "compression/movement_sparsity/model_sparsity": 0.6834965347983262, "compression_loss": 76.82508850097656, "distillation_loss": 2.6059072017669678, "epoch": 3.03, "learning_rate": 3.869634638865408e-05, "loss": 79.6378, "step": 3590, "task_loss": 1.5625333786010742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7188291598794756, "compression/movement_sparsity/importance_threshold": -0.0019692460032057837, "compression/movement_sparsity/linear_layer_sparsity": 0.7081871430381177, "compression/movement_sparsity/model_sparsity": 0.6838587360362792, "compression_loss": 76.86338806152344, "distillation_loss": 3.4056460857391357, "epoch": 3.04, "learning_rate": 3.869165023011177e-05, "loss": 80.0982, "step": 3591, "task_loss": 2.65210223197937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7191914933917322, "compression/movement_sparsity/importance_threshold": -0.001966708315369688, "compression/movement_sparsity/linear_layer_sparsity": 0.7086996556872747, "compression/movement_sparsity/model_sparsity": 0.6843536422992644, "compression_loss": 76.90164184570312, "distillation_loss": 3.5913431644439697, "epoch": 3.04, "learning_rate": 3.868695407156946e-05, "loss": 80.0666, "step": 3592, "task_loss": 2.770752191543579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.719553515487152, "compression/movement_sparsity/importance_threshold": -0.001964172808614535, "compression/movement_sparsity/linear_layer_sparsity": 0.7092405955520756, "compression/movement_sparsity/model_sparsity": 0.6848759992155837, "compression_loss": 76.93987274169922, "distillation_loss": 2.959197521209717, "epoch": 3.04, "learning_rate": 3.868225791302715e-05, "loss": 79.9809, "step": 3593, "task_loss": 1.1539011001586914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7199152262996196, "compression/movement_sparsity/importance_threshold": -0.0019616394820026325, "compression/movement_sparsity/linear_layer_sparsity": 0.7096841268914592, "compression/movement_sparsity/model_sparsity": 0.6853042938889975, "compression_loss": 76.97811889648438, "distillation_loss": 4.976275444030762, "epoch": 3.04, "learning_rate": 3.867756175448483e-05, "loss": 80.3089, "step": 3594, "task_loss": 3.1354384422302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7202766259630206, "compression/movement_sparsity/importance_threshold": -0.0019591083345962823, "compression/movement_sparsity/linear_layer_sparsity": 0.7101547141672085, "compression/movement_sparsity/model_sparsity": 0.6857587150441291, "compression_loss": 77.0163345336914, "distillation_loss": 3.236546754837036, "epoch": 3.04, "learning_rate": 3.867286559594252e-05, "loss": 80.0372, "step": 3595, "task_loss": 1.638723373413086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7206377146112397, "compression/movement_sparsity/importance_threshold": -0.0019565793654577916, "compression/movement_sparsity/linear_layer_sparsity": 0.7106480289064718, "compression/movement_sparsity/model_sparsity": 0.686235082904485, "compression_loss": 77.05443572998047, "distillation_loss": 3.1052932739257812, "epoch": 3.04, "learning_rate": 3.866816943740021e-05, "loss": 80.1409, "step": 3596, "task_loss": 1.676505208015442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7209984923781626, "compression/movement_sparsity/importance_threshold": -0.001954052573649459, "compression/movement_sparsity/linear_layer_sparsity": 0.7110364109705395, "compression/movement_sparsity/model_sparsity": 0.686610122849849, "compression_loss": 77.0925064086914, "distillation_loss": 2.44527268409729, "epoch": 3.04, "learning_rate": 3.8663473278857896e-05, "loss": 79.7299, "step": 3597, "task_loss": 3.414745330810547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7213589593976738, "compression/movement_sparsity/importance_threshold": -0.0019515279582335938, "compression/movement_sparsity/linear_layer_sparsity": 0.7113079004192728, "compression/movement_sparsity/model_sparsity": 0.686872285800819, "compression_loss": 77.13054656982422, "distillation_loss": 2.2238564491271973, "epoch": 3.04, "learning_rate": 3.865877712031558e-05, "loss": 80.0305, "step": 3598, "task_loss": 2.557774782180786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7217191158036588, "compression/movement_sparsity/importance_threshold": -0.0019490055182724989, "compression/movement_sparsity/linear_layer_sparsity": 0.7118512608901039, "compression/movement_sparsity/model_sparsity": 0.6873969801679047, "compression_loss": 77.16860961914062, "distillation_loss": 3.12207293510437, "epoch": 3.04, "learning_rate": 3.865408096177327e-05, "loss": 80.1855, "step": 3599, "task_loss": 1.5262266397476196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.722078961730003, "compression/movement_sparsity/importance_threshold": -0.0019464852528284738, "compression/movement_sparsity/linear_layer_sparsity": 0.7122738295427835, "compression/movement_sparsity/model_sparsity": 0.6878050322873916, "compression_loss": 77.20663452148438, "distillation_loss": 3.6043689250946045, "epoch": 3.04, "learning_rate": 3.864938480323096e-05, "loss": 80.1713, "step": 3600, "task_loss": 2.2894561290740967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7224384973105908, "compression/movement_sparsity/importance_threshold": -0.0019439671609638286, "compression/movement_sparsity/linear_layer_sparsity": 0.7125002694861886, "compression/movement_sparsity/model_sparsity": 0.68802369332213, "compression_loss": 77.24468231201172, "distillation_loss": 3.0752198696136475, "epoch": 3.04, "learning_rate": 3.864468864468865e-05, "loss": 80.2729, "step": 3601, "task_loss": 2.9824411869049072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7227977226793083, "compression/movement_sparsity/importance_threshold": -0.001941451241740862, "compression/movement_sparsity/linear_layer_sparsity": 0.71290038493121, "compression/movement_sparsity/model_sparsity": 0.6884100635707158, "compression_loss": 77.28262329101562, "distillation_loss": 3.17338228225708, "epoch": 3.04, "learning_rate": 3.8639992486146335e-05, "loss": 80.1684, "step": 3602, "task_loss": 2.5938024520874023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7231566379700398, "compression/movement_sparsity/importance_threshold": -0.0019389374942218813, "compression/movement_sparsity/linear_layer_sparsity": 0.7132567029085048, "compression/movement_sparsity/model_sparsity": 0.6887541409293283, "compression_loss": 77.320556640625, "distillation_loss": 4.08018684387207, "epoch": 3.05, "learning_rate": 3.863529632760402e-05, "loss": 80.3788, "step": 3603, "task_loss": 2.2018134593963623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7235152433166709, "compression/movement_sparsity/importance_threshold": -0.0019364259174691895, "compression/movement_sparsity/linear_layer_sparsity": 0.7137803288818985, "compression/movement_sparsity/model_sparsity": 0.6892597787396739, "compression_loss": 77.3584213256836, "distillation_loss": 3.6879143714904785, "epoch": 3.05, "learning_rate": 3.863060016906171e-05, "loss": 80.3016, "step": 3604, "task_loss": 2.270658016204834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7238735388530865, "compression/movement_sparsity/importance_threshold": -0.0019339165105450922, "compression/movement_sparsity/linear_layer_sparsity": 0.7141935251388164, "compression/movement_sparsity/model_sparsity": 0.6896587804340264, "compression_loss": 77.39633178710938, "distillation_loss": 3.4445180892944336, "epoch": 3.05, "learning_rate": 3.86259040105194e-05, "loss": 80.0609, "step": 3605, "task_loss": 1.3628276586532593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7242315247131721, "compression/movement_sparsity/importance_threshold": -0.0019314092725118892, "compression/movement_sparsity/linear_layer_sparsity": 0.7147066339963553, "compression/movement_sparsity/model_sparsity": 0.6901542624238013, "compression_loss": 77.43413543701172, "distillation_loss": 2.320568561553955, "epoch": 3.05, "learning_rate": 3.862120785197709e-05, "loss": 80.0468, "step": 3606, "task_loss": 1.0154718160629272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7245892010308121, "compression/movement_sparsity/importance_threshold": -0.0019289042024318902, "compression/movement_sparsity/linear_layer_sparsity": 0.714971755939571, "compression/movement_sparsity/model_sparsity": 0.6904102766126571, "compression_loss": 77.47197723388672, "distillation_loss": 4.382578372955322, "epoch": 3.05, "learning_rate": 3.861651169343477e-05, "loss": 80.7576, "step": 3607, "task_loss": 1.8668166399002075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7249465679398923, "compression/movement_sparsity/importance_threshold": -0.0019264012993673958, "compression/movement_sparsity/linear_layer_sparsity": 0.7154117338769991, "compression/movement_sparsity/model_sparsity": 0.690835139954404, "compression_loss": 77.50968170166016, "distillation_loss": 4.620975971221924, "epoch": 3.05, "learning_rate": 3.861181553489246e-05, "loss": 81.0878, "step": 3608, "task_loss": 2.8663976192474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.725303625574298, "compression/movement_sparsity/importance_threshold": -0.001923900562380709, "compression/movement_sparsity/linear_layer_sparsity": 0.7157900280952468, "compression/movement_sparsity/model_sparsity": 0.6912004386024858, "compression_loss": 77.54743957519531, "distillation_loss": 3.309844970703125, "epoch": 3.05, "learning_rate": 3.8607119376350146e-05, "loss": 80.7841, "step": 3609, "task_loss": 1.6952966451644897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7256603740679137, "compression/movement_sparsity/importance_threshold": -0.0019214019905341353, "compression/movement_sparsity/linear_layer_sparsity": 0.7162374228649444, "compression/movement_sparsity/model_sparsity": 0.691632463985497, "compression_loss": 77.58517456054688, "distillation_loss": 4.154474258422852, "epoch": 3.05, "learning_rate": 3.860242321780784e-05, "loss": 80.8587, "step": 3610, "task_loss": 1.9240983724594116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7260168135546251, "compression/movement_sparsity/importance_threshold": -0.0019189055828899779, "compression/movement_sparsity/linear_layer_sparsity": 0.716595779874905, "compression/movement_sparsity/model_sparsity": 0.6919785103297303, "compression_loss": 77.62287139892578, "distillation_loss": 2.423325777053833, "epoch": 3.05, "learning_rate": 3.859772705926552e-05, "loss": 80.1838, "step": 3611, "task_loss": 1.3057798147201538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7263729441683169, "compression/movement_sparsity/importance_threshold": -0.0019164113385105423, "compression/movement_sparsity/linear_layer_sparsity": 0.7169204153387914, "compression/movement_sparsity/model_sparsity": 0.6922919935667368, "compression_loss": 77.66051483154297, "distillation_loss": 4.885219573974609, "epoch": 3.05, "learning_rate": 3.859303090072321e-05, "loss": 81.002, "step": 3612, "task_loss": 2.5745415687561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7267287660428745, "compression/movement_sparsity/importance_threshold": -0.0019139192564581308, "compression/movement_sparsity/linear_layer_sparsity": 0.7173174185760598, "compression/movement_sparsity/model_sparsity": 0.6926753585214802, "compression_loss": 77.69811248779297, "distillation_loss": 2.904564380645752, "epoch": 3.05, "learning_rate": 3.85883347421809e-05, "loss": 81.0122, "step": 3613, "task_loss": 1.4313076734542847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7270842793121828, "compression/movement_sparsity/importance_threshold": -0.0019114293357950498, "compression/movement_sparsity/linear_layer_sparsity": 0.7177054071425955, "compression/movement_sparsity/model_sparsity": 0.693050018487163, "compression_loss": 77.7357406616211, "distillation_loss": 2.5375521183013916, "epoch": 3.05, "learning_rate": 3.8583638583638584e-05, "loss": 80.1462, "step": 3614, "task_loss": 1.7872376441955566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7274394841101275, "compression/movement_sparsity/importance_threshold": -0.001908941575583599, "compression/movement_sparsity/linear_layer_sparsity": 0.7180883517862212, "compression/movement_sparsity/model_sparsity": 0.6934198078042046, "compression_loss": 77.77330780029297, "distillation_loss": 1.942220687866211, "epoch": 3.06, "learning_rate": 3.857894242509627e-05, "loss": 80.648, "step": 3615, "task_loss": 1.4977554082870483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7277943805705929, "compression/movement_sparsity/importance_threshold": -0.0019064559748860873, "compression/movement_sparsity/linear_layer_sparsity": 0.7183394747566325, "compression/movement_sparsity/model_sparsity": 0.6936623039280376, "compression_loss": 77.81082153320312, "distillation_loss": 2.5870237350463867, "epoch": 3.06, "learning_rate": 3.857424626655396e-05, "loss": 80.8943, "step": 3616, "task_loss": 1.488090991973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7281489688274649, "compression/movement_sparsity/importance_threshold": -0.0019039725327648143, "compression/movement_sparsity/linear_layer_sparsity": 0.718707299555696, "compression/movement_sparsity/model_sparsity": 0.6940174928136918, "compression_loss": 77.84829711914062, "distillation_loss": 3.1270554065704346, "epoch": 3.06, "learning_rate": 3.856955010801165e-05, "loss": 80.6975, "step": 3617, "task_loss": 2.2518465518951416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7285032490146282, "compression/movement_sparsity/importance_threshold": -0.0019014912482820884, "compression/movement_sparsity/linear_layer_sparsity": 0.7192190967547949, "compression/movement_sparsity/model_sparsity": 0.6945117082045293, "compression_loss": 77.8857650756836, "distillation_loss": 2.673177719116211, "epoch": 3.06, "learning_rate": 3.8564853949469336e-05, "loss": 80.9399, "step": 3618, "task_loss": 1.6172763109207153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7288572212659681, "compression/movement_sparsity/importance_threshold": -0.00189901212050021, "compression/movement_sparsity/linear_layer_sparsity": 0.7197790914394779, "compression/movement_sparsity/model_sparsity": 0.6950524653490484, "compression_loss": 77.92314910888672, "distillation_loss": 2.8687996864318848, "epoch": 3.06, "learning_rate": 3.856015779092703e-05, "loss": 81.7723, "step": 3619, "task_loss": 1.2039071321487427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7292108857153698, "compression/movement_sparsity/importance_threshold": -0.0018965351484814827, "compression/movement_sparsity/linear_layer_sparsity": 0.7201681651052685, "compression/movement_sparsity/model_sparsity": 0.6954281731374884, "compression_loss": 77.96052551269531, "distillation_loss": 3.458604335784912, "epoch": 3.06, "learning_rate": 3.855546163238471e-05, "loss": 80.8898, "step": 3620, "task_loss": 1.9407539367675781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7295642424967181, "compression/movement_sparsity/importance_threshold": -0.001894060331288215, "compression/movement_sparsity/linear_layer_sparsity": 0.7204810314156982, "compression/movement_sparsity/model_sparsity": 0.6957302915276656, "compression_loss": 77.99787902832031, "distillation_loss": 4.6278605461120605, "epoch": 3.06, "learning_rate": 3.8550765473842395e-05, "loss": 82.0177, "step": 3621, "task_loss": 2.6036248207092285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7299172917438987, "compression/movement_sparsity/importance_threshold": -0.0018915876679827066, "compression/movement_sparsity/linear_layer_sparsity": 0.7208555695211407, "compression/movement_sparsity/model_sparsity": 0.6960919630969721, "compression_loss": 78.03522491455078, "distillation_loss": 4.99056339263916, "epoch": 3.06, "learning_rate": 3.854606931530009e-05, "loss": 81.5843, "step": 3622, "task_loss": 2.6957998275756836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7302700335907963, "compression/movement_sparsity/importance_threshold": -0.0018891171576272629, "compression/movement_sparsity/linear_layer_sparsity": 0.7212296068115425, "compression/movement_sparsity/model_sparsity": 0.6964531510557753, "compression_loss": 78.072509765625, "distillation_loss": 3.8841280937194824, "epoch": 3.06, "learning_rate": 3.8541373156757775e-05, "loss": 81.0477, "step": 3623, "task_loss": 1.9544757604599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7306224681712964, "compression/movement_sparsity/importance_threshold": -0.001886648799284187, "compression/movement_sparsity/linear_layer_sparsity": 0.7216126945451798, "compression/movement_sparsity/model_sparsity": 0.6968230785472465, "compression_loss": 78.10975646972656, "distillation_loss": 3.145758628845215, "epoch": 3.06, "learning_rate": 3.853667699821546e-05, "loss": 81.7043, "step": 3624, "task_loss": 2.4154679775238037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7309745956192836, "compression/movement_sparsity/importance_threshold": -0.0018841825920157852, "compression/movement_sparsity/linear_layer_sparsity": 0.7219546081279705, "compression/movement_sparsity/model_sparsity": 0.6971532463466191, "compression_loss": 78.14703369140625, "distillation_loss": 2.746878147125244, "epoch": 3.06, "learning_rate": 3.853198083967315e-05, "loss": 81.6506, "step": 3625, "task_loss": 1.4255664348602295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7313264160686437, "compression/movement_sparsity/importance_threshold": -0.0018817185348843574, "compression/movement_sparsity/linear_layer_sparsity": 0.7223441349121312, "compression/movement_sparsity/model_sparsity": 0.6975293916874195, "compression_loss": 78.18419647216797, "distillation_loss": 4.186704158782959, "epoch": 3.07, "learning_rate": 3.852728468113084e-05, "loss": 81.5808, "step": 3626, "task_loss": 2.326265335083008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7316779296532612, "compression/movement_sparsity/importance_threshold": -0.0018792566269522125, "compression/movement_sparsity/linear_layer_sparsity": 0.7226935964930356, "compression/movement_sparsity/model_sparsity": 0.69786684818795, "compression_loss": 78.22138214111328, "distillation_loss": 3.167693853378296, "epoch": 3.07, "learning_rate": 3.852258852258853e-05, "loss": 81.2427, "step": 3627, "task_loss": 1.7537399530410767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7320291365070214, "compression/movement_sparsity/importance_threshold": -0.0018767968672816528, "compression/movement_sparsity/linear_layer_sparsity": 0.7231088794792897, "compression/movement_sparsity/model_sparsity": 0.6982678649260665, "compression_loss": 78.25849151611328, "distillation_loss": 3.4511637687683105, "epoch": 3.07, "learning_rate": 3.8517892364046206e-05, "loss": 81.4805, "step": 3628, "task_loss": 2.6254754066467285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.73238003676381, "compression/movement_sparsity/importance_threshold": -0.0018743392549349794, "compression/movement_sparsity/linear_layer_sparsity": 0.7234880680101101, "compression/movement_sparsity/model_sparsity": 0.6986340271643329, "compression_loss": 78.29554748535156, "distillation_loss": 4.300675392150879, "epoch": 3.07, "learning_rate": 3.85131962055039e-05, "loss": 81.9237, "step": 3629, "task_loss": 2.7455594539642334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7327306305575114, "compression/movement_sparsity/importance_threshold": -0.0018718837889744998, "compression/movement_sparsity/linear_layer_sparsity": 0.7239467668907266, "compression/movement_sparsity/model_sparsity": 0.6990769683272773, "compression_loss": 78.33262634277344, "distillation_loss": 2.9245405197143555, "epoch": 3.07, "learning_rate": 3.8508500046961586e-05, "loss": 80.9703, "step": 3630, "task_loss": 0.8655033707618713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7330809180220114, "compression/movement_sparsity/importance_threshold": -0.0018694304684625154, "compression/movement_sparsity/linear_layer_sparsity": 0.7243793518442204, "compression/movement_sparsity/model_sparsity": 0.6994946926568316, "compression_loss": 78.36958312988281, "distillation_loss": 2.9687905311584473, "epoch": 3.07, "learning_rate": 3.850380388841928e-05, "loss": 81.3497, "step": 3631, "task_loss": 2.8129398822784424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7334308992911944, "compression/movement_sparsity/importance_threshold": -0.0018669792924613333, "compression/movement_sparsity/linear_layer_sparsity": 0.724774876484702, "compression/movement_sparsity/model_sparsity": 0.6998766298091366, "compression_loss": 78.4065170288086, "distillation_loss": 4.348560810089111, "epoch": 3.07, "learning_rate": 3.849910772987696e-05, "loss": 82.2525, "step": 3632, "task_loss": 2.106330633163452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7337805744989463, "compression/movement_sparsity/importance_threshold": -0.0018645302600332541, "compression/movement_sparsity/linear_layer_sparsity": 0.7250536635240284, "compression/movement_sparsity/model_sparsity": 0.700145839656013, "compression_loss": 78.44348907470703, "distillation_loss": 4.152263641357422, "epoch": 3.07, "learning_rate": 3.849441157133465e-05, "loss": 82.0938, "step": 3633, "task_loss": 1.9087483882904053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7341299437791516, "compression/movement_sparsity/importance_threshold": -0.0018620833702405843, "compression/movement_sparsity/linear_layer_sparsity": 0.7254442634832764, "compression/movement_sparsity/model_sparsity": 0.7005230213050347, "compression_loss": 78.48028564453125, "distillation_loss": 3.975745439529419, "epoch": 3.07, "learning_rate": 3.848971541279234e-05, "loss": 81.7384, "step": 3634, "task_loss": 2.539940595626831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.734479007265696, "compression/movement_sparsity/importance_threshold": -0.0018596386221456252, "compression/movement_sparsity/linear_layer_sparsity": 0.7257643200667906, "compression/movement_sparsity/model_sparsity": 0.700832082960296, "compression_loss": 78.51718139648438, "distillation_loss": 4.288151741027832, "epoch": 3.07, "learning_rate": 3.8485019254250024e-05, "loss": 81.735, "step": 3635, "task_loss": 2.3239364624023438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7348277650924642, "compression/movement_sparsity/importance_threshold": -0.001857196014810685, "compression/movement_sparsity/linear_layer_sparsity": 0.726067635118944, "compression/movement_sparsity/model_sparsity": 0.7011249782073017, "compression_loss": 78.55399322509766, "distillation_loss": 2.8438634872436523, "epoch": 3.07, "learning_rate": 3.848032309570772e-05, "loss": 81.3079, "step": 3636, "task_loss": 1.6512243747711182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7351762173933416, "compression/movement_sparsity/importance_threshold": -0.0018547555472980632, "compression/movement_sparsity/linear_layer_sparsity": 0.7263469825941494, "compression/movement_sparsity/model_sparsity": 0.7013947292373603, "compression_loss": 78.59081268310547, "distillation_loss": 2.2527084350585938, "epoch": 3.07, "learning_rate": 3.84756269371654e-05, "loss": 81.3794, "step": 3637, "task_loss": 1.198965311050415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7355243643022131, "compression/movement_sparsity/importance_threshold": -0.0018523172186700665, "compression/movement_sparsity/linear_layer_sparsity": 0.7266417241697727, "compression/movement_sparsity/model_sparsity": 0.7016793455331298, "compression_loss": 78.62754821777344, "distillation_loss": 3.71903657913208, "epoch": 3.08, "learning_rate": 3.847093077862309e-05, "loss": 81.5244, "step": 3638, "task_loss": 2.1009232997894287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7358722059529639, "compression/movement_sparsity/importance_threshold": -0.0018498810279889994, "compression/movement_sparsity/linear_layer_sparsity": 0.726897592958903, "compression/movement_sparsity/model_sparsity": 0.7019264244422091, "compression_loss": 78.66429901123047, "distillation_loss": 3.2995386123657227, "epoch": 3.08, "learning_rate": 3.8466234620080777e-05, "loss": 81.7971, "step": 3639, "task_loss": 1.203376054763794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7362197424794796, "compression/movement_sparsity/importance_threshold": -0.0018474469743171608, "compression/movement_sparsity/linear_layer_sparsity": 0.7272286078524746, "compression/movement_sparsity/model_sparsity": 0.7022460679558655, "compression_loss": 78.70101165771484, "distillation_loss": 2.769207000732422, "epoch": 3.08, "learning_rate": 3.846153846153846e-05, "loss": 82.1276, "step": 3640, "task_loss": 2.1956887245178223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7365669740156446, "compression/movement_sparsity/importance_threshold": -0.0018450150567168623, "compression/movement_sparsity/linear_layer_sparsity": 0.7275413906937309, "compression/movement_sparsity/model_sparsity": 0.7025481057442922, "compression_loss": 78.73765563964844, "distillation_loss": 3.2514421939849854, "epoch": 3.08, "learning_rate": 3.845684230299615e-05, "loss": 82.4288, "step": 3641, "task_loss": 2.4310896396636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7369139006953448, "compression/movement_sparsity/importance_threshold": -0.001842585274250401, "compression/movement_sparsity/linear_layer_sparsity": 0.7279702791552576, "compression/movement_sparsity/model_sparsity": 0.7029622605677502, "compression_loss": 78.77429962158203, "distillation_loss": 1.6661925315856934, "epoch": 3.08, "learning_rate": 3.8452146144453836e-05, "loss": 82.3089, "step": 3642, "task_loss": 1.5886659622192383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7372605226524647, "compression/movement_sparsity/importance_threshold": -0.0018401576259800857, "compression/movement_sparsity/linear_layer_sparsity": 0.7283275510659634, "compression/movement_sparsity/model_sparsity": 0.7033072590892262, "compression_loss": 78.81092834472656, "distillation_loss": 3.1796875, "epoch": 3.08, "learning_rate": 3.844744998591153e-05, "loss": 82.006, "step": 3643, "task_loss": 2.089752435684204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7376068400208897, "compression/movement_sparsity/importance_threshold": -0.001837732110968218, "compression/movement_sparsity/linear_layer_sparsity": 0.7286816153755751, "compression/movement_sparsity/model_sparsity": 0.7036491602005736, "compression_loss": 78.84752655029297, "distillation_loss": 2.6766669750213623, "epoch": 3.08, "learning_rate": 3.8442753827369215e-05, "loss": 81.9107, "step": 3644, "task_loss": 1.5094844102859497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.737952852934505, "compression/movement_sparsity/importance_threshold": -0.0018353087282771026, "compression/movement_sparsity/linear_layer_sparsity": 0.7290775573619238, "compression/movement_sparsity/model_sparsity": 0.7040315003616313, "compression_loss": 78.88410186767578, "distillation_loss": 3.699563980102539, "epoch": 3.08, "learning_rate": 3.84380576688269e-05, "loss": 81.8898, "step": 3645, "task_loss": 1.863808035850525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7382985615271958, "compression/movement_sparsity/importance_threshold": -0.0018328874769690424, "compression/movement_sparsity/linear_layer_sparsity": 0.7294015727690931, "compression/movement_sparsity/model_sparsity": 0.7043443848427763, "compression_loss": 78.92060089111328, "distillation_loss": 3.2535645961761475, "epoch": 3.08, "learning_rate": 3.843336151028459e-05, "loss": 81.9191, "step": 3646, "task_loss": 1.5681560039520264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7386439659328468, "compression/movement_sparsity/importance_threshold": -0.001830468356106345, "compression/movement_sparsity/linear_layer_sparsity": 0.7298259896677565, "compression/movement_sparsity/model_sparsity": 0.7047542217153114, "compression_loss": 78.95710754394531, "distillation_loss": 3.8319528102874756, "epoch": 3.08, "learning_rate": 3.8428665351742274e-05, "loss": 81.7564, "step": 3647, "task_loss": 1.8651117086410522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7389890662853433, "compression/movement_sparsity/importance_threshold": -0.0018280513647513114, "compression/movement_sparsity/linear_layer_sparsity": 0.7302781183320055, "compression/movement_sparsity/model_sparsity": 0.705190818369033, "compression_loss": 78.99356079101562, "distillation_loss": 3.7793290615081787, "epoch": 3.08, "learning_rate": 3.842396919319997e-05, "loss": 82.434, "step": 3648, "task_loss": 1.4207701683044434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.739333862718571, "compression/movement_sparsity/importance_threshold": -0.0018256365019662431, "compression/movement_sparsity/linear_layer_sparsity": 0.7306252904727236, "compression/movement_sparsity/model_sparsity": 0.7055260640786911, "compression_loss": 79.02995300292969, "distillation_loss": 2.865920066833496, "epoch": 3.08, "learning_rate": 3.8419273034657653e-05, "loss": 82.3231, "step": 3649, "task_loss": 1.8904931545257568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7396783553664146, "compression/movement_sparsity/importance_threshold": -0.0018232237668134484, "compression/movement_sparsity/linear_layer_sparsity": 0.7310068280645683, "compression/movement_sparsity/model_sparsity": 0.705894494680509, "compression_loss": 79.06633758544922, "distillation_loss": 3.8554601669311523, "epoch": 3.09, "learning_rate": 3.841457687611534e-05, "loss": 82.5304, "step": 3650, "task_loss": 2.560987949371338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7400225443627593, "compression/movement_sparsity/importance_threshold": -0.0018208131583552292, "compression/movement_sparsity/linear_layer_sparsity": 0.7312806546501582, "compression/movement_sparsity/model_sparsity": 0.7061589144804948, "compression_loss": 79.10265350341797, "distillation_loss": 1.7472658157348633, "epoch": 3.09, "learning_rate": 3.8409880717573026e-05, "loss": 82.0822, "step": 3651, "task_loss": 1.467380404472351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.74036642984149, "compression/movement_sparsity/importance_threshold": -0.0018184046756538905, "compression/movement_sparsity/linear_layer_sparsity": 0.7316225443846137, "compression/movement_sparsity/model_sparsity": 0.7064890592507959, "compression_loss": 79.13899230957031, "distillation_loss": 4.3007049560546875, "epoch": 3.09, "learning_rate": 3.840518455903071e-05, "loss": 82.6115, "step": 3652, "task_loss": 2.3277735710144043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7407100119364922, "compression/movement_sparsity/importance_threshold": -0.001815998317771736, "compression/movement_sparsity/linear_layer_sparsity": 0.7320303151452573, "compression/movement_sparsity/model_sparsity": 0.7068828218313619, "compression_loss": 79.17524719238281, "distillation_loss": 3.8125357627868652, "epoch": 3.09, "learning_rate": 3.8400488400488406e-05, "loss": 82.2469, "step": 3653, "task_loss": 2.9564433097839355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7410532907816508, "compression/movement_sparsity/importance_threshold": -0.00181359408377107, "compression/movement_sparsity/linear_layer_sparsity": 0.7323878016909805, "compression/movement_sparsity/model_sparsity": 0.7072280276144821, "compression_loss": 79.21153259277344, "distillation_loss": 3.332451343536377, "epoch": 3.09, "learning_rate": 3.8395792241946085e-05, "loss": 82.7035, "step": 3654, "task_loss": 2.9230785369873047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7413962665108513, "compression/movement_sparsity/importance_threshold": -0.0018111919727141932, "compression/movement_sparsity/linear_layer_sparsity": 0.732878588506705, "compression/movement_sparsity/model_sparsity": 0.7077019543932497, "compression_loss": 79.24777221679688, "distillation_loss": 2.7436177730560303, "epoch": 3.09, "learning_rate": 3.839109608340378e-05, "loss": 82.1883, "step": 3655, "task_loss": 1.7471091747283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7417389392579783, "compression/movement_sparsity/importance_threshold": -0.0018087919836634152, "compression/movement_sparsity/linear_layer_sparsity": 0.7331544541249607, "compression/movement_sparsity/model_sparsity": 0.7079683431788564, "compression_loss": 79.28392791748047, "distillation_loss": 3.8545992374420166, "epoch": 3.09, "learning_rate": 3.8386399924861465e-05, "loss": 82.9038, "step": 3656, "task_loss": 2.3255913257598877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7420813091569174, "compression/movement_sparsity/importance_threshold": -0.0018063941156810362, "compression/movement_sparsity/linear_layer_sparsity": 0.7335390562278878, "compression/movement_sparsity/model_sparsity": 0.7083397330163734, "compression_loss": 79.32008361816406, "distillation_loss": 3.248971939086914, "epoch": 3.09, "learning_rate": 3.838170376631916e-05, "loss": 83.0756, "step": 3657, "task_loss": 1.86123788356781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7424233763415533, "compression/movement_sparsity/importance_threshold": -0.0018039983678293618, "compression/movement_sparsity/linear_layer_sparsity": 0.7339600508904396, "compression/movement_sparsity/model_sparsity": 0.7087462652171356, "compression_loss": 79.35621643066406, "distillation_loss": 4.9636030197143555, "epoch": 3.09, "learning_rate": 3.837700760777684e-05, "loss": 83.3769, "step": 3658, "task_loss": 3.1896374225616455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7427651409457716, "compression/movement_sparsity/importance_threshold": -0.0018016047391706943, "compression/movement_sparsity/linear_layer_sparsity": 0.7343515570864279, "compression/movement_sparsity/model_sparsity": 0.7091243219708777, "compression_loss": 79.39234924316406, "distillation_loss": 3.953709125518799, "epoch": 3.09, "learning_rate": 3.837231144923453e-05, "loss": 83.1929, "step": 3659, "task_loss": 2.3795108795166016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7431066031034573, "compression/movement_sparsity/importance_threshold": -0.0017992132287673384, "compression/movement_sparsity/linear_layer_sparsity": 0.7347732910473731, "compression/movement_sparsity/model_sparsity": 0.709531568072859, "compression_loss": 79.42837524414062, "distillation_loss": 5.477741241455078, "epoch": 3.09, "learning_rate": 3.836761529069222e-05, "loss": 83.1869, "step": 3660, "task_loss": 2.803633451461792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7434477629484952, "compression/movement_sparsity/importance_threshold": -0.0017968238356815998, "compression/movement_sparsity/linear_layer_sparsity": 0.7351081693712587, "compression/movement_sparsity/model_sparsity": 0.7098549422961129, "compression_loss": 79.46440124511719, "distillation_loss": 4.0181097984313965, "epoch": 3.09, "learning_rate": 3.83629191321499e-05, "loss": 82.752, "step": 3661, "task_loss": 2.931694984436035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7437886206147711, "compression/movement_sparsity/importance_threshold": -0.0017944365589757788, "compression/movement_sparsity/linear_layer_sparsity": 0.7354431430884854, "compression/movement_sparsity/model_sparsity": 0.7101784086356532, "compression_loss": 79.50037384033203, "distillation_loss": 4.247005462646484, "epoch": 3.1, "learning_rate": 3.835822297360759e-05, "loss": 82.8285, "step": 3662, "task_loss": 2.5462474822998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7441291762361695, "compression/movement_sparsity/importance_threshold": -0.0017920513977121837, "compression/movement_sparsity/linear_layer_sparsity": 0.7359013530782287, "compression/movement_sparsity/model_sparsity": 0.7106208777026299, "compression_loss": 79.536376953125, "distillation_loss": 4.422715663909912, "epoch": 3.1, "learning_rate": 3.8353526815065276e-05, "loss": 83.3995, "step": 3663, "task_loss": 3.2990036010742188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7444694299465761, "compression/movement_sparsity/importance_threshold": -0.0017896683509531141, "compression/movement_sparsity/linear_layer_sparsity": 0.7363274393603609, "compression/movement_sparsity/model_sparsity": 0.7110323266101762, "compression_loss": 79.5722427368164, "distillation_loss": 3.418513298034668, "epoch": 3.1, "learning_rate": 3.834883065652297e-05, "loss": 83.3651, "step": 3664, "task_loss": 3.4073374271392822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7448093818798753, "compression/movement_sparsity/importance_threshold": -0.001787287417760879, "compression/movement_sparsity/linear_layer_sparsity": 0.736693380140938, "compression/movement_sparsity/model_sparsity": 0.7113856961991749, "compression_loss": 79.60807037353516, "distillation_loss": 2.824603796005249, "epoch": 3.1, "learning_rate": 3.8344134497980655e-05, "loss": 82.7088, "step": 3665, "task_loss": 2.347618341445923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7451490321699531, "compression/movement_sparsity/importance_threshold": -0.0017849085971977765, "compression/movement_sparsity/linear_layer_sparsity": 0.7371311044106829, "compression/movement_sparsity/model_sparsity": 0.7118083832936567, "compression_loss": 79.64395904541016, "distillation_loss": 4.204372406005859, "epoch": 3.1, "learning_rate": 3.833943833943834e-05, "loss": 83.3589, "step": 3666, "task_loss": 1.3465356826782227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7454883809506939, "compression/movement_sparsity/importance_threshold": -0.0017825318883261162, "compression/movement_sparsity/linear_layer_sparsity": 0.737431188013407, "compression/movement_sparsity/model_sparsity": 0.7120981581014622, "compression_loss": 79.6798095703125, "distillation_loss": 3.618772506713867, "epoch": 3.1, "learning_rate": 3.833474218089603e-05, "loss": 83.5978, "step": 3667, "task_loss": 1.5472601652145386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7458274283559831, "compression/movement_sparsity/importance_threshold": -0.0017801572902082004, "compression/movement_sparsity/linear_layer_sparsity": 0.7377192401309866, "compression/movement_sparsity/model_sparsity": 0.7123763147426508, "compression_loss": 79.71559143066406, "distillation_loss": 3.541869640350342, "epoch": 3.1, "learning_rate": 3.8330046022353714e-05, "loss": 83.2007, "step": 3668, "task_loss": 1.3770487308502197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7461661745197063, "compression/movement_sparsity/importance_threshold": -0.0017777848019063297, "compression/movement_sparsity/linear_layer_sparsity": 0.7380524609955706, "compression/movement_sparsity/model_sparsity": 0.7126980884454294, "compression_loss": 79.75137329101562, "distillation_loss": 2.7551233768463135, "epoch": 3.1, "learning_rate": 3.832534986381141e-05, "loss": 82.8538, "step": 3669, "task_loss": 2.6314852237701416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7465046195757481, "compression/movement_sparsity/importance_threshold": -0.0017754144224828112, "compression/movement_sparsity/linear_layer_sparsity": 0.7383105119073784, "compression/movement_sparsity/model_sparsity": 0.712947274514559, "compression_loss": 79.78717041015625, "distillation_loss": 2.686685562133789, "epoch": 3.1, "learning_rate": 3.8320653705269094e-05, "loss": 83.3442, "step": 3670, "task_loss": 1.5284781455993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7468427636579938, "compression/movement_sparsity/importance_threshold": -0.0017730461509999481, "compression/movement_sparsity/linear_layer_sparsity": 0.7386255364921502, "compression/movement_sparsity/model_sparsity": 0.7132514770357149, "compression_loss": 79.82286071777344, "distillation_loss": 4.080593585968018, "epoch": 3.1, "learning_rate": 3.831595754672678e-05, "loss": 83.3257, "step": 3671, "task_loss": 2.463806629180908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7471806069003284, "compression/movement_sparsity/importance_threshold": -0.001770679986520046, "compression/movement_sparsity/linear_layer_sparsity": 0.7389662934306803, "compression/movement_sparsity/model_sparsity": 0.7135805279251155, "compression_loss": 79.85858154296875, "distillation_loss": 3.674704074859619, "epoch": 3.1, "learning_rate": 3.8311261388184466e-05, "loss": 83.2212, "step": 3672, "task_loss": 2.4121415615081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7475181494366374, "compression/movement_sparsity/importance_threshold": -0.0017683159281054054, "compression/movement_sparsity/linear_layer_sparsity": 0.7392809841387584, "compression/movement_sparsity/model_sparsity": 0.7138844080392692, "compression_loss": 79.89419555664062, "distillation_loss": 3.373446226119995, "epoch": 3.1, "learning_rate": 3.830656522964215e-05, "loss": 82.855, "step": 3673, "task_loss": 1.2321401834487915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7478553914008055, "compression/movement_sparsity/importance_threshold": -0.0017659539748183344, "compression/movement_sparsity/linear_layer_sparsity": 0.7395644812243011, "compression/movement_sparsity/model_sparsity": 0.7141581661277845, "compression_loss": 79.9298324584961, "distillation_loss": 3.794870138168335, "epoch": 3.11, "learning_rate": 3.8301869071099846e-05, "loss": 83.7096, "step": 3674, "task_loss": 1.777003288269043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7481923329267182, "compression/movement_sparsity/importance_threshold": -0.0017635941257211334, "compression/movement_sparsity/linear_layer_sparsity": 0.7398825583959877, "compression/movement_sparsity/model_sparsity": 0.7144653163701038, "compression_loss": 79.9654312133789, "distillation_loss": 3.687852621078491, "epoch": 3.11, "learning_rate": 3.8297172912557525e-05, "loss": 83.5923, "step": 3675, "task_loss": 2.527103900909424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7485289741482603, "compression/movement_sparsity/importance_threshold": -0.00176123637987611, "compression/movement_sparsity/linear_layer_sparsity": 0.7402259624997329, "compression/movement_sparsity/model_sparsity": 0.7147969234864509, "compression_loss": 80.00098419189453, "distillation_loss": 4.270294189453125, "epoch": 3.11, "learning_rate": 3.829247675401522e-05, "loss": 83.8908, "step": 3676, "task_loss": 2.380890369415283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7488653151993174, "compression/movement_sparsity/importance_threshold": -0.0017588807363455626, "compression/movement_sparsity/linear_layer_sparsity": 0.7404894508319826, "compression/movement_sparsity/model_sparsity": 0.7150513601839028, "compression_loss": 80.0364990234375, "distillation_loss": 2.9639081954956055, "epoch": 3.11, "learning_rate": 3.8287780595472905e-05, "loss": 83.9567, "step": 3677, "task_loss": 2.479766607284546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.749201356213774, "compression/movement_sparsity/importance_threshold": -0.0017565271941918023, "compression/movement_sparsity/linear_layer_sparsity": 0.7409115425179569, "compression/movement_sparsity/model_sparsity": 0.715458951721958, "compression_loss": 80.07195281982422, "distillation_loss": 2.8811328411102295, "epoch": 3.11, "learning_rate": 3.828308443693059e-05, "loss": 83.4629, "step": 3678, "task_loss": 1.3239631652832031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7495370973255158, "compression/movement_sparsity/importance_threshold": -0.0017541757524771285, "compression/movement_sparsity/linear_layer_sparsity": 0.7412424143215167, "compression/movement_sparsity/model_sparsity": 0.7157784570611848, "compression_loss": 80.10746765136719, "distillation_loss": 2.1937625408172607, "epoch": 3.11, "learning_rate": 3.827838827838828e-05, "loss": 83.4617, "step": 3679, "task_loss": 0.9094792604446411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7498725386684278, "compression/movement_sparsity/importance_threshold": -0.0017518264102638443, "compression/movement_sparsity/linear_layer_sparsity": 0.7415643668476851, "compression/movement_sparsity/model_sparsity": 0.7160893495276375, "compression_loss": 80.14289093017578, "distillation_loss": 4.416505813598633, "epoch": 3.11, "learning_rate": 3.8273692119845964e-05, "loss": 83.4784, "step": 3680, "task_loss": 2.572610378265381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7502076803763951, "compression/movement_sparsity/importance_threshold": -0.001749479166614257, "compression/movement_sparsity/linear_layer_sparsity": 0.7418542910595834, "compression/movement_sparsity/model_sparsity": 0.7163693139509459, "compression_loss": 80.17829132080078, "distillation_loss": 3.7247090339660645, "epoch": 3.11, "learning_rate": 3.826899596130366e-05, "loss": 83.4595, "step": 3681, "task_loss": 2.460824489593506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7505425225833029, "compression/movement_sparsity/importance_threshold": -0.001747134020590668, "compression/movement_sparsity/linear_layer_sparsity": 0.7421404949311794, "compression/movement_sparsity/model_sparsity": 0.7166456858390865, "compression_loss": 80.21363830566406, "distillation_loss": 4.373508453369141, "epoch": 3.11, "learning_rate": 3.826429980276134e-05, "loss": 83.7789, "step": 3682, "task_loss": 2.1487691402435303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.750877065423036, "compression/movement_sparsity/importance_threshold": -0.0017447909712553847, "compression/movement_sparsity/linear_layer_sparsity": 0.7425370450500777, "compression/movement_sparsity/model_sparsity": 0.7170286132414697, "compression_loss": 80.24897003173828, "distillation_loss": 3.018580675125122, "epoch": 3.11, "learning_rate": 3.8259603644219036e-05, "loss": 83.8925, "step": 3683, "task_loss": 1.8671622276306152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7512113090294802, "compression/movement_sparsity/importance_threshold": -0.0017424500176707056, "compression/movement_sparsity/linear_layer_sparsity": 0.7430302166993293, "compression/movement_sparsity/model_sparsity": 0.7175048429273961, "compression_loss": 80.2842788696289, "distillation_loss": 3.3490335941314697, "epoch": 3.11, "learning_rate": 3.8254907485676716e-05, "loss": 84.429, "step": 3684, "task_loss": 1.9273213148117065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7515452535365199, "compression/movement_sparsity/importance_threshold": -0.0017401111588989408, "compression/movement_sparsity/linear_layer_sparsity": 0.7433543274998398, "compression/movement_sparsity/model_sparsity": 0.7178178195248276, "compression_loss": 80.31957244873047, "distillation_loss": 4.245872974395752, "epoch": 3.11, "learning_rate": 3.82502113271344e-05, "loss": 83.8295, "step": 3685, "task_loss": 1.7592244148254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.751878899078041, "compression/movement_sparsity/importance_threshold": -0.001737774394002388, "compression/movement_sparsity/linear_layer_sparsity": 0.743680536953854, "compression/movement_sparsity/model_sparsity": 0.7181328226805588, "compression_loss": 80.35487365722656, "distillation_loss": 4.044859409332275, "epoch": 3.12, "learning_rate": 3.8245515168592095e-05, "loss": 83.2428, "step": 3686, "task_loss": 1.762267827987671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7522122457879278, "compression/movement_sparsity/importance_threshold": -0.0017354397220433583, "compression/movement_sparsity/linear_layer_sparsity": 0.7439549239753229, "compression/movement_sparsity/model_sparsity": 0.718397783663727, "compression_loss": 80.39007568359375, "distillation_loss": 4.115485191345215, "epoch": 3.12, "learning_rate": 3.824081901004978e-05, "loss": 83.6271, "step": 3687, "task_loss": 2.304518222808838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7525452938000659, "compression/movement_sparsity/importance_threshold": -0.001733107142084151, "compression/movement_sparsity/linear_layer_sparsity": 0.7442654769972313, "compression/movement_sparsity/model_sparsity": 0.7186976682339601, "compression_loss": 80.42536926269531, "distillation_loss": 3.246710777282715, "epoch": 3.12, "learning_rate": 3.823612285150747e-05, "loss": 83.7131, "step": 3688, "task_loss": 1.9812899827957153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7528780432483407, "compression/movement_sparsity/importance_threshold": -0.0017307766531870685, "compression/movement_sparsity/linear_layer_sparsity": 0.7446811654051851, "compression/movement_sparsity/model_sparsity": 0.7190990764662936, "compression_loss": 80.4605712890625, "distillation_loss": 3.553994655609131, "epoch": 3.12, "learning_rate": 3.8231426692965154e-05, "loss": 83.2559, "step": 3689, "task_loss": 2.5499186515808105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7532104942666369, "compression/movement_sparsity/importance_threshold": -0.0017284482544144188, "compression/movement_sparsity/linear_layer_sparsity": 0.7450368514015953, "compression/movement_sparsity/model_sparsity": 0.7194425435545089, "compression_loss": 80.49568939208984, "distillation_loss": 3.2814273834228516, "epoch": 3.12, "learning_rate": 3.822673053442285e-05, "loss": 83.4977, "step": 3690, "task_loss": 2.414238929748535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.75354264698884, "compression/movement_sparsity/importance_threshold": -0.0017261219448285034, "compression/movement_sparsity/linear_layer_sparsity": 0.7452704339214142, "compression/movement_sparsity/model_sparsity": 0.7196681017961881, "compression_loss": 80.53082275390625, "distillation_loss": 2.927618980407715, "epoch": 3.12, "learning_rate": 3.8222034375880534e-05, "loss": 84.1464, "step": 3691, "task_loss": 2.2867934703826904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7538745015488346, "compression/movement_sparsity/importance_threshold": -0.0017237977234916286, "compression/movement_sparsity/linear_layer_sparsity": 0.7455817381658837, "compression/movement_sparsity/model_sparsity": 0.7199687117821763, "compression_loss": 80.56590270996094, "distillation_loss": 4.218554973602295, "epoch": 3.12, "learning_rate": 3.8217338217338214e-05, "loss": 84.183, "step": 3692, "task_loss": 2.631753444671631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7542060580805066, "compression/movement_sparsity/importance_threshold": -0.001721475589466095, "compression/movement_sparsity/linear_layer_sparsity": 0.745932571026066, "compression/movement_sparsity/model_sparsity": 0.7203074924543232, "compression_loss": 80.6009292602539, "distillation_loss": 2.8070919513702393, "epoch": 3.12, "learning_rate": 3.821264205879591e-05, "loss": 83.7731, "step": 3693, "task_loss": 2.3226654529571533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7545373167177403, "compression/movement_sparsity/importance_threshold": -0.0017191555418142115, "compression/movement_sparsity/linear_layer_sparsity": 0.7462844770613356, "compression/movement_sparsity/model_sparsity": 0.7206473094346917, "compression_loss": 80.63587188720703, "distillation_loss": 4.210654258728027, "epoch": 3.12, "learning_rate": 3.820794590025359e-05, "loss": 84.3579, "step": 3694, "task_loss": 1.7326912879943848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7548682775944217, "compression/movement_sparsity/importance_threshold": -0.0017168375795982753, "compression/movement_sparsity/linear_layer_sparsity": 0.746650894808618, "compression/movement_sparsity/model_sparsity": 0.7210011396051221, "compression_loss": 80.67080688476562, "distillation_loss": 3.219837188720703, "epoch": 3.12, "learning_rate": 3.8203249741711286e-05, "loss": 83.559, "step": 3695, "task_loss": 2.899458408355713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7551989408444351, "compression/movement_sparsity/importance_threshold": -0.0017145217018805971, "compression/movement_sparsity/linear_layer_sparsity": 0.7469993070627703, "compression/movement_sparsity/model_sparsity": 0.7213375828265028, "compression_loss": 80.70569610595703, "distillation_loss": 3.9166975021362305, "epoch": 3.12, "learning_rate": 3.819855358316897e-05, "loss": 83.8487, "step": 3696, "task_loss": 3.231672763824463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7555293066016664, "compression/movement_sparsity/importance_threshold": -0.0017122079077234757, "compression/movement_sparsity/linear_layer_sparsity": 0.7471278376657173, "compression/movement_sparsity/model_sparsity": 0.7214616980078322, "compression_loss": 80.7405776977539, "distillation_loss": 4.193362712860107, "epoch": 3.13, "learning_rate": 3.819385742462666e-05, "loss": 84.9088, "step": 3697, "task_loss": 3.107306718826294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.755859375, "compression/movement_sparsity/importance_threshold": -0.00170989619618922, "compression/movement_sparsity/linear_layer_sparsity": 0.7475332116686662, "compression/movement_sparsity/model_sparsity": 0.7218531461667034, "compression_loss": 80.77540588378906, "distillation_loss": 5.4476823806762695, "epoch": 3.13, "learning_rate": 3.8189161266084345e-05, "loss": 85.152, "step": 3698, "task_loss": 3.258300542831421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7561891461733214, "compression/movement_sparsity/importance_threshold": -0.0017075865663401316, "compression/movement_sparsity/linear_layer_sparsity": 0.7479199124250971, "compression/movement_sparsity/model_sparsity": 0.7222265625625204, "compression_loss": 80.81019592285156, "distillation_loss": 3.840869665145874, "epoch": 3.13, "learning_rate": 3.818446510754203e-05, "loss": 84.3903, "step": 3699, "task_loss": 3.404210329055786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7565186202555162, "compression/movement_sparsity/importance_threshold": -0.0017052790172385115, "compression/movement_sparsity/linear_layer_sparsity": 0.7482691951434869, "compression/movement_sparsity/model_sparsity": 0.722563846345014, "compression_loss": 80.8449478149414, "distillation_loss": 4.6749067306518555, "epoch": 3.13, "learning_rate": 3.8179768948999725e-05, "loss": 84.6341, "step": 3700, "task_loss": 2.163327693939209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7568477973804688, "compression/movement_sparsity/importance_threshold": -0.0017029735479466672, "compression/movement_sparsity/linear_layer_sparsity": 0.7484824827299895, "compression/movement_sparsity/model_sparsity": 0.722769806846771, "compression_loss": 80.87963104248047, "distillation_loss": 4.222968101501465, "epoch": 3.13, "learning_rate": 3.8175072790457404e-05, "loss": 85.2322, "step": 3701, "task_loss": 3.54156756401062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7571766776820649, "compression/movement_sparsity/importance_threshold": -0.0017006701575269017, "compression/movement_sparsity/linear_layer_sparsity": 0.7488716994857917, "compression/movement_sparsity/model_sparsity": 0.7231456528096406, "compression_loss": 80.91436767578125, "distillation_loss": 4.028522491455078, "epoch": 3.13, "learning_rate": 3.81703766319151e-05, "loss": 84.5291, "step": 3702, "task_loss": 2.719282627105713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.757505261294189, "compression/movement_sparsity/importance_threshold": -0.0016983688450415207, "compression/movement_sparsity/linear_layer_sparsity": 0.7492360424279055, "compression/movement_sparsity/model_sparsity": 0.7234974794508428, "compression_loss": 80.94901275634766, "distillation_loss": 2.947445869445801, "epoch": 3.13, "learning_rate": 3.8165680473372784e-05, "loss": 84.4737, "step": 3703, "task_loss": 1.441405177116394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.757833548350727, "compression/movement_sparsity/importance_threshold": -0.0016960696095528246, "compression/movement_sparsity/linear_layer_sparsity": 0.7494379782068189, "compression/movement_sparsity/model_sparsity": 0.7236924781145233, "compression_loss": 80.98356628417969, "distillation_loss": 4.459933757781982, "epoch": 3.13, "learning_rate": 3.816098431483047e-05, "loss": 85.0449, "step": 3704, "task_loss": 2.2515008449554443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7581615389855635, "compression/movement_sparsity/importance_threshold": -0.0016937724501231207, "compression/movement_sparsity/linear_layer_sparsity": 0.7498044794232748, "compression/movement_sparsity/model_sparsity": 0.7240463888867044, "compression_loss": 81.01819610595703, "distillation_loss": 2.7761459350585938, "epoch": 3.13, "learning_rate": 3.8156288156288156e-05, "loss": 84.6429, "step": 3705, "task_loss": 2.0169272422790527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.758489233332584, "compression/movement_sparsity/importance_threshold": -0.0016914773658147095, "compression/movement_sparsity/linear_layer_sparsity": 0.7501835606365864, "compression/movement_sparsity/model_sparsity": 0.7244124474941486, "compression_loss": 81.052734375, "distillation_loss": 4.765417098999023, "epoch": 3.13, "learning_rate": 3.815159199774584e-05, "loss": 84.7367, "step": 3706, "task_loss": 2.3753676414489746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7588166315256731, "compression/movement_sparsity/importance_threshold": -0.0016891843556899, "compression/movement_sparsity/linear_layer_sparsity": 0.7505824478923414, "compression/movement_sparsity/model_sparsity": 0.7247976317455476, "compression_loss": 81.08727264404297, "distillation_loss": 4.686157703399658, "epoch": 3.13, "learning_rate": 3.8146895839203536e-05, "loss": 84.6329, "step": 3707, "task_loss": 2.8983380794525146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7591437336987167, "compression/movement_sparsity/importance_threshold": -0.0016868934188109912, "compression/movement_sparsity/linear_layer_sparsity": 0.7508614137941824, "compression/movement_sparsity/model_sparsity": 0.7250670143104608, "compression_loss": 81.1216812133789, "distillation_loss": 2.9957194328308105, "epoch": 3.13, "learning_rate": 3.814219968066122e-05, "loss": 84.8033, "step": 3708, "task_loss": 0.7069545388221741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7594705399855991, "compression/movement_sparsity/importance_threshold": -0.0016846045542402918, "compression/movement_sparsity/linear_layer_sparsity": 0.7511791451650076, "compression/movement_sparsity/model_sparsity": 0.7253738306312422, "compression_loss": 81.15621185302734, "distillation_loss": 4.061777591705322, "epoch": 3.14, "learning_rate": 3.813750352211891e-05, "loss": 84.8296, "step": 3709, "task_loss": 2.2253663539886475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.759797050520206, "compression/movement_sparsity/importance_threshold": -0.0016823177610401025, "compression/movement_sparsity/linear_layer_sparsity": 0.7514032598957237, "compression/movement_sparsity/model_sparsity": 0.7255902463315006, "compression_loss": 81.19059753417969, "distillation_loss": 4.129062652587891, "epoch": 3.14, "learning_rate": 3.8132807363576595e-05, "loss": 84.6115, "step": 3710, "task_loss": 1.870001196861267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7601232654364226, "compression/movement_sparsity/importance_threshold": -0.0016800330382727262, "compression/movement_sparsity/linear_layer_sparsity": 0.7517578488687113, "compression/movement_sparsity/model_sparsity": 0.7259326540824228, "compression_loss": 81.22498321533203, "distillation_loss": 3.150096893310547, "epoch": 3.14, "learning_rate": 3.812811120503428e-05, "loss": 85.1965, "step": 3711, "task_loss": 3.007744312286377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7604491848681336, "compression/movement_sparsity/importance_threshold": -0.0016777503850004703, "compression/movement_sparsity/linear_layer_sparsity": 0.7520319974068275, "compression/movement_sparsity/model_sparsity": 0.726197384774875, "compression_loss": 81.25930786132812, "distillation_loss": 5.543707847595215, "epoch": 3.14, "learning_rate": 3.8123415046491974e-05, "loss": 84.9724, "step": 3712, "task_loss": 2.6793017387390137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7607748089492248, "compression/movement_sparsity/importance_threshold": -0.0016754698002856344, "compression/movement_sparsity/linear_layer_sparsity": 0.7523608182535539, "compression/movement_sparsity/model_sparsity": 0.7265149096139454, "compression_loss": 81.29362487792969, "distillation_loss": 3.457920551300049, "epoch": 3.14, "learning_rate": 3.811871888794966e-05, "loss": 84.4721, "step": 3713, "task_loss": 2.228994846343994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7611001378135805, "compression/movement_sparsity/importance_threshold": -0.0016731912831905284, "compression/movement_sparsity/linear_layer_sparsity": 0.7528234163369872, "compression/movement_sparsity/model_sparsity": 0.7269616160300946, "compression_loss": 81.32791900634766, "distillation_loss": 3.6463546752929688, "epoch": 3.14, "learning_rate": 3.811402272940735e-05, "loss": 84.6943, "step": 3714, "task_loss": 1.452162742614746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7614251715950866, "compression/movement_sparsity/importance_threshold": -0.001670914832777451, "compression/movement_sparsity/linear_layer_sparsity": 0.7531967501014986, "compression/movement_sparsity/model_sparsity": 0.7273221246312859, "compression_loss": 81.36219024658203, "distillation_loss": 3.8088483810424805, "epoch": 3.14, "learning_rate": 3.810932657086503e-05, "loss": 84.9145, "step": 3715, "task_loss": 1.6999133825302124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7617499104276276, "compression/movement_sparsity/importance_threshold": -0.0016686404481087097, "compression/movement_sparsity/linear_layer_sparsity": 0.7535338343963968, "compression/movement_sparsity/model_sparsity": 0.7276476290436618, "compression_loss": 81.39639282226562, "distillation_loss": 3.507528781890869, "epoch": 3.14, "learning_rate": 3.810463041232272e-05, "loss": 84.301, "step": 3716, "task_loss": 1.8312020301818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7620743544450893, "compression/movement_sparsity/importance_threshold": -0.0016663681282466057, "compression/movement_sparsity/linear_layer_sparsity": 0.753914895021536, "compression/movement_sparsity/model_sparsity": 0.7280155990640479, "compression_loss": 81.43057250976562, "distillation_loss": 4.118577480316162, "epoch": 3.14, "learning_rate": 3.809993425378041e-05, "loss": 84.9717, "step": 3717, "task_loss": 2.585632562637329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7623985037813562, "compression/movement_sparsity/importance_threshold": -0.0016640978722534455, "compression/movement_sparsity/linear_layer_sparsity": 0.7543006537687238, "compression/movement_sparsity/model_sparsity": 0.728388105811537, "compression_loss": 81.46473693847656, "distillation_loss": 3.6276211738586426, "epoch": 3.14, "learning_rate": 3.809523809523809e-05, "loss": 85.3466, "step": 3718, "task_loss": 2.4423177242279053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7627223585703138, "compression/movement_sparsity/importance_threshold": -0.001661829679191532, "compression/movement_sparsity/linear_layer_sparsity": 0.7546235721524706, "compression/movement_sparsity/model_sparsity": 0.728699930955389, "compression_loss": 81.49889373779297, "distillation_loss": 2.4993996620178223, "epoch": 3.14, "learning_rate": 3.8090541936695785e-05, "loss": 85.2204, "step": 3719, "task_loss": 1.887010097503662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7630459189458474, "compression/movement_sparsity/importance_threshold": -0.0016595635481231677, "compression/movement_sparsity/linear_layer_sparsity": 0.755037471935279, "compression/movement_sparsity/model_sparsity": 0.7290996120073533, "compression_loss": 81.5329360961914, "distillation_loss": 3.0448527336120605, "epoch": 3.14, "learning_rate": 3.808584577815347e-05, "loss": 84.4637, "step": 3720, "task_loss": 1.684658169746399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7633691850418416, "compression/movement_sparsity/importance_threshold": -0.0016572994781106597, "compression/movement_sparsity/linear_layer_sparsity": 0.7554243992508952, "compression/movement_sparsity/model_sparsity": 0.7294732471793504, "compression_loss": 81.5670166015625, "distillation_loss": 3.7425639629364014, "epoch": 3.15, "learning_rate": 3.8081149619611165e-05, "loss": 85.2015, "step": 3721, "task_loss": 2.3294520378112793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7636921569921822, "compression/movement_sparsity/importance_threshold": -0.0016550374682163084, "compression/movement_sparsity/linear_layer_sparsity": 0.7556088780483896, "compression/movement_sparsity/model_sparsity": 0.7296513885626277, "compression_loss": 81.6010513305664, "distillation_loss": 2.205416202545166, "epoch": 3.15, "learning_rate": 3.8076453461068844e-05, "loss": 84.5668, "step": 3722, "task_loss": 2.1741702556610107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7640148349307536, "compression/movement_sparsity/importance_threshold": -0.0016527775175024214, "compression/movement_sparsity/linear_layer_sparsity": 0.7560314228527341, "compression/movement_sparsity/model_sparsity": 0.730059417653043, "compression_loss": 81.63509368896484, "distillation_loss": 3.2796034812927246, "epoch": 3.15, "learning_rate": 3.807175730252653e-05, "loss": 84.711, "step": 3723, "task_loss": 1.651004672050476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7643372189914417, "compression/movement_sparsity/importance_threshold": -0.0016505196250312989, "compression/movement_sparsity/linear_layer_sparsity": 0.7563347617532228, "compression/movement_sparsity/model_sparsity": 0.7303523359291204, "compression_loss": 81.66900634765625, "distillation_loss": 3.9813239574432373, "epoch": 3.15, "learning_rate": 3.8067061143984224e-05, "loss": 85.4953, "step": 3724, "task_loss": 2.0276739597320557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7646593093081311, "compression/movement_sparsity/importance_threshold": -0.0016482637898652484, "compression/movement_sparsity/linear_layer_sparsity": 0.7566772834685629, "compression/movement_sparsity/model_sparsity": 0.7306830909698185, "compression_loss": 81.70294952392578, "distillation_loss": 2.271095037460327, "epoch": 3.15, "learning_rate": 3.806236498544191e-05, "loss": 85.3375, "step": 3725, "task_loss": 1.721961498260498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7649811060147073, "compression/movement_sparsity/importance_threshold": -0.001646010011066571, "compression/movement_sparsity/linear_layer_sparsity": 0.7568961038688486, "compression/movement_sparsity/model_sparsity": 0.7308943942161842, "compression_loss": 81.73682403564453, "distillation_loss": 5.023978233337402, "epoch": 3.15, "learning_rate": 3.80576688268996e-05, "loss": 85.2046, "step": 3726, "task_loss": 3.194732427597046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.765302609245055, "compression/movement_sparsity/importance_threshold": -0.0016437582876975736, "compression/movement_sparsity/linear_layer_sparsity": 0.7572975190481424, "compression/movement_sparsity/model_sparsity": 0.7312820195491716, "compression_loss": 81.77066040039062, "distillation_loss": 2.6423795223236084, "epoch": 3.15, "learning_rate": 3.805297266835728e-05, "loss": 84.39, "step": 3727, "task_loss": 0.879217267036438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7656238191330595, "compression/movement_sparsity/importance_threshold": -0.0016415086188205598, "compression/movement_sparsity/linear_layer_sparsity": 0.7576272461316093, "compression/movement_sparsity/model_sparsity": 0.7316004194929623, "compression_loss": 81.80448150634766, "distillation_loss": 2.6968331336975098, "epoch": 3.15, "learning_rate": 3.8048276509814976e-05, "loss": 85.3542, "step": 3728, "task_loss": 1.652719497680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7659447358126066, "compression/movement_sparsity/importance_threshold": -0.0016392610034978283, "compression/movement_sparsity/linear_layer_sparsity": 0.7578896374404364, "compression/movement_sparsity/model_sparsity": 0.7318537968531211, "compression_loss": 81.8382568359375, "distillation_loss": 3.2174360752105713, "epoch": 3.15, "learning_rate": 3.804358035127266e-05, "loss": 85.4225, "step": 3729, "task_loss": 1.9126152992248535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7662653594175803, "compression/movement_sparsity/importance_threshold": -0.0016370154407916908, "compression/movement_sparsity/linear_layer_sparsity": 0.7582424616366139, "compression/movement_sparsity/model_sparsity": 0.7321945004527458, "compression_loss": 81.87201690673828, "distillation_loss": 4.195473670959473, "epoch": 3.15, "learning_rate": 3.803888419273035e-05, "loss": 84.9587, "step": 3730, "task_loss": 3.317570924758911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7665856900818668, "compression/movement_sparsity/importance_threshold": -0.0016347719297644444, "compression/movement_sparsity/linear_layer_sparsity": 0.7586298301464326, "compression/movement_sparsity/model_sparsity": 0.7325685616625672, "compression_loss": 81.90574645996094, "distillation_loss": 4.281726360321045, "epoch": 3.15, "learning_rate": 3.8034188034188035e-05, "loss": 85.6519, "step": 3731, "task_loss": 2.209256649017334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7669057279393504, "compression/movement_sparsity/importance_threshold": -0.0016325304694783998, "compression/movement_sparsity/linear_layer_sparsity": 0.7589294725549542, "compression/movement_sparsity/model_sparsity": 0.7328579104325482, "compression_loss": 81.9394302368164, "distillation_loss": 4.009186267852783, "epoch": 3.15, "learning_rate": 3.802949187564572e-05, "loss": 85.679, "step": 3732, "task_loss": 1.9952187538146973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.767225473123917, "compression/movement_sparsity/importance_threshold": -0.001630291058995854, "compression/movement_sparsity/linear_layer_sparsity": 0.7593051196079867, "compression/movement_sparsity/model_sparsity": 0.7332206528536837, "compression_loss": 81.97311401367188, "distillation_loss": 3.398517370223999, "epoch": 3.16, "learning_rate": 3.8024795717103414e-05, "loss": 84.9304, "step": 3733, "task_loss": 1.4153525829315186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7675449257694509, "compression/movement_sparsity/importance_threshold": -0.001628053697379118, "compression/movement_sparsity/linear_layer_sparsity": 0.759611880744587, "compression/movement_sparsity/model_sparsity": 0.733516875801534, "compression_loss": 82.00682067871094, "distillation_loss": 3.0639572143554688, "epoch": 3.16, "learning_rate": 3.80200995585611e-05, "loss": 85.4746, "step": 3734, "task_loss": 2.5014772415161133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.767864086009838, "compression/movement_sparsity/importance_threshold": -0.0016258183836904893, "compression/movement_sparsity/linear_layer_sparsity": 0.7599700588920331, "compression/movement_sparsity/model_sparsity": 0.7338627494277304, "compression_loss": 82.04039001464844, "distillation_loss": 4.306668281555176, "epoch": 3.16, "learning_rate": 3.801540340001879e-05, "loss": 85.4426, "step": 3735, "task_loss": 2.2235805988311768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.768182953978963, "compression/movement_sparsity/importance_threshold": -0.0016235851169922773, "compression/movement_sparsity/linear_layer_sparsity": 0.7603497482378943, "compression/movement_sparsity/model_sparsity": 0.7342293952765002, "compression_loss": 82.07396697998047, "distillation_loss": 2.787050724029541, "epoch": 3.16, "learning_rate": 3.8010707241476473e-05, "loss": 85.4374, "step": 3736, "task_loss": 1.996794581413269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7685015298107113, "compression/movement_sparsity/importance_threshold": -0.0016213538963467805, "compression/movement_sparsity/linear_layer_sparsity": 0.7607166310277145, "compression/movement_sparsity/model_sparsity": 0.7345836745138267, "compression_loss": 82.10758972167969, "distillation_loss": 2.326565980911255, "epoch": 3.16, "learning_rate": 3.800601108293416e-05, "loss": 85.4147, "step": 3737, "task_loss": 1.564960241317749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7688198136389677, "compression/movement_sparsity/importance_threshold": -0.001619124720816309, "compression/movement_sparsity/linear_layer_sparsity": 0.7610723885691304, "compression/movement_sparsity/model_sparsity": 0.7349272106892568, "compression_loss": 82.14110565185547, "distillation_loss": 3.0554215908050537, "epoch": 3.16, "learning_rate": 3.800131492439185e-05, "loss": 85.5256, "step": 3738, "task_loss": 2.169434070587158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7691378055976175, "compression/movement_sparsity/importance_threshold": -0.0016168975894631631, "compression/movement_sparsity/linear_layer_sparsity": 0.7615073464319838, "compression/movement_sparsity/model_sparsity": 0.7353472264114342, "compression_loss": 82.17452239990234, "distillation_loss": 3.0137085914611816, "epoch": 3.16, "learning_rate": 3.799661876584953e-05, "loss": 85.9602, "step": 3739, "task_loss": 2.209989547729492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7694555058205462, "compression/movement_sparsity/importance_threshold": -0.0016146725013496451, "compression/movement_sparsity/linear_layer_sparsity": 0.7618246604569417, "compression/movement_sparsity/model_sparsity": 0.7356536397234628, "compression_loss": 82.20801544189453, "distillation_loss": 4.140649795532227, "epoch": 3.16, "learning_rate": 3.7991922607307226e-05, "loss": 86.2876, "step": 3740, "task_loss": 3.157860040664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7697729144416383, "compression/movement_sparsity/importance_threshold": -0.001612449455538064, "compression/movement_sparsity/linear_layer_sparsity": 0.7621022312311694, "compression/movement_sparsity/model_sparsity": 0.7359216750876881, "compression_loss": 82.24140930175781, "distillation_loss": 3.3894877433776855, "epoch": 3.16, "learning_rate": 3.798722644876491e-05, "loss": 85.6449, "step": 3741, "task_loss": 1.4611133337020874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7700900315947796, "compression/movement_sparsity/importance_threshold": -0.0016102284510907195, "compression/movement_sparsity/linear_layer_sparsity": 0.7623060629527368, "compression/movement_sparsity/model_sparsity": 0.73611850456256, "compression_loss": 82.2748031616211, "distillation_loss": 3.5760154724121094, "epoch": 3.16, "learning_rate": 3.79825302902226e-05, "loss": 86.1086, "step": 3742, "task_loss": 2.121083974838257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7704068574138547, "compression/movement_sparsity/importance_threshold": -0.001608009487069917, "compression/movement_sparsity/linear_layer_sparsity": 0.7626130506485221, "compression/movement_sparsity/model_sparsity": 0.7364149462865904, "compression_loss": 82.30810546875, "distillation_loss": 4.571725845336914, "epoch": 3.16, "learning_rate": 3.797783413168029e-05, "loss": 86.5255, "step": 3743, "task_loss": 2.3491389751434326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7707233920327491, "compression/movement_sparsity/importance_threshold": -0.0016057925625379606, "compression/movement_sparsity/linear_layer_sparsity": 0.7629625480019292, "compression/movement_sparsity/model_sparsity": 0.7367524373307284, "compression_loss": 82.3414077758789, "distillation_loss": 3.604790449142456, "epoch": 3.16, "learning_rate": 3.797313797313797e-05, "loss": 86.1671, "step": 3744, "task_loss": 2.0918924808502197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7710396355853476, "compression/movement_sparsity/importance_threshold": -0.001603577676557156, "compression/movement_sparsity/linear_layer_sparsity": 0.7633271890482339, "compression/movement_sparsity/model_sparsity": 0.7371045518353254, "compression_loss": 82.37469482421875, "distillation_loss": 4.50681734085083, "epoch": 3.17, "learning_rate": 3.7968441814595664e-05, "loss": 85.9067, "step": 3745, "task_loss": 2.2871451377868652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7713555882055358, "compression/movement_sparsity/importance_threshold": -0.0016013648281898024, "compression/movement_sparsity/linear_layer_sparsity": 0.7636456835657879, "compression/movement_sparsity/model_sparsity": 0.7374121050863977, "compression_loss": 82.4079360961914, "distillation_loss": 3.3302059173583984, "epoch": 3.17, "learning_rate": 3.796374565605335e-05, "loss": 85.8246, "step": 3746, "task_loss": 2.2445249557495117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7716712500271983, "compression/movement_sparsity/importance_threshold": -0.0015991540164982092, "compression/movement_sparsity/linear_layer_sparsity": 0.7640252775183078, "compression/movement_sparsity/model_sparsity": 0.737778658818881, "compression_loss": 82.44114685058594, "distillation_loss": 2.938812494277954, "epoch": 3.17, "learning_rate": 3.795904949751104e-05, "loss": 85.9171, "step": 3747, "task_loss": 1.5680830478668213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7719866211842208, "compression/movement_sparsity/importance_threshold": -0.001596945240544676, "compression/movement_sparsity/linear_layer_sparsity": 0.7642387201189899, "compression/movement_sparsity/model_sparsity": 0.7379847690096033, "compression_loss": 82.4743423461914, "distillation_loss": 2.9713926315307617, "epoch": 3.17, "learning_rate": 3.795435333896872e-05, "loss": 85.7899, "step": 3748, "task_loss": 1.6404054164886475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7723017018104879, "compression/movement_sparsity/importance_threshold": -0.0015947384993915108, "compression/movement_sparsity/linear_layer_sparsity": 0.7644708836628601, "compression/movement_sparsity/model_sparsity": 0.738208957021523, "compression_loss": 82.50749969482422, "distillation_loss": 3.577488660812378, "epoch": 3.17, "learning_rate": 3.794965718042641e-05, "loss": 85.723, "step": 3749, "task_loss": 2.3458402156829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7726164920398849, "compression/movement_sparsity/importance_threshold": -0.001592533792101016, "compression/movement_sparsity/linear_layer_sparsity": 0.7648701167194765, "compression/movement_sparsity/model_sparsity": 0.7385944751944601, "compression_loss": 82.54058074951172, "distillation_loss": 3.8069372177124023, "epoch": 3.17, "learning_rate": 3.79449610218841e-05, "loss": 85.9233, "step": 3750, "task_loss": 1.3519659042358398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7729309920062974, "compression/movement_sparsity/importance_threshold": -0.0015903311177354919, "compression/movement_sparsity/linear_layer_sparsity": 0.765081639529169, "compression/movement_sparsity/model_sparsity": 0.7387987315449195, "compression_loss": 82.57363891601562, "distillation_loss": 3.5107202529907227, "epoch": 3.17, "learning_rate": 3.794026486334179e-05, "loss": 86.1822, "step": 3751, "task_loss": 2.120448350906372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7732452018436099, "compression/movement_sparsity/importance_threshold": -0.0015881304753572485, "compression/movement_sparsity/linear_layer_sparsity": 0.7652966441958112, "compression/movement_sparsity/model_sparsity": 0.7390063501398308, "compression_loss": 82.60675811767578, "distillation_loss": 3.8247909545898438, "epoch": 3.17, "learning_rate": 3.7935568704799475e-05, "loss": 86.0003, "step": 3752, "task_loss": 1.4965037107467651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7735591216857081, "compression/movement_sparsity/importance_threshold": -0.0015859318640285853, "compression/movement_sparsity/linear_layer_sparsity": 0.7655352110177019, "compression/movement_sparsity/model_sparsity": 0.7392367214574722, "compression_loss": 82.63980865478516, "distillation_loss": 3.484588623046875, "epoch": 3.17, "learning_rate": 3.793087254625716e-05, "loss": 87.1008, "step": 3753, "task_loss": 1.8741004467010498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7738727516664765, "compression/movement_sparsity/importance_threshold": -0.0015837352828118097, "compression/movement_sparsity/linear_layer_sparsity": 0.7657559154364741, "compression/movement_sparsity/model_sparsity": 0.7394498440004933, "compression_loss": 82.67288970947266, "distillation_loss": 4.289256572723389, "epoch": 3.17, "learning_rate": 3.792617638771485e-05, "loss": 86.6443, "step": 3754, "task_loss": 2.6934778690338135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7741860919198009, "compression/movement_sparsity/importance_threshold": -0.0015815407307692222, "compression/movement_sparsity/linear_layer_sparsity": 0.7661891085225174, "compression/movement_sparsity/model_sparsity": 0.7398681555713732, "compression_loss": 82.7059097290039, "distillation_loss": 4.224939823150635, "epoch": 3.17, "learning_rate": 3.792148022917254e-05, "loss": 87.2242, "step": 3755, "task_loss": 2.5787277221679688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.774499142579566, "compression/movement_sparsity/importance_threshold": -0.0015793482069631293, "compression/movement_sparsity/linear_layer_sparsity": 0.766576512804839, "compression/movement_sparsity/model_sparsity": 0.740242251324802, "compression_loss": 82.7389144897461, "distillation_loss": 4.364882469177246, "epoch": 3.17, "learning_rate": 3.791678407063022e-05, "loss": 86.2716, "step": 3756, "task_loss": 2.642099380493164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7748119037796571, "compression/movement_sparsity/importance_threshold": -0.001577157710455834, "compression/movement_sparsity/linear_layer_sparsity": 0.7670453233796105, "compression/movement_sparsity/model_sparsity": 0.7406949568141002, "compression_loss": 82.77190399169922, "distillation_loss": 3.3080596923828125, "epoch": 3.18, "learning_rate": 3.7912087912087914e-05, "loss": 87.1048, "step": 3757, "task_loss": 1.4967310428619385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7751243756539593, "compression/movement_sparsity/importance_threshold": -0.001574969240309641, "compression/movement_sparsity/linear_layer_sparsity": 0.7673615284569784, "compression/movement_sparsity/model_sparsity": 0.7410002992742999, "compression_loss": 82.80481719970703, "distillation_loss": 3.5971250534057617, "epoch": 3.18, "learning_rate": 3.79073917535456e-05, "loss": 86.7371, "step": 3758, "task_loss": 1.5292072296142578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7754365583363576, "compression/movement_sparsity/importance_threshold": -0.001572782795586855, "compression/movement_sparsity/linear_layer_sparsity": 0.7675814697290219, "compression/movement_sparsity/model_sparsity": 0.7412126848870302, "compression_loss": 82.83771514892578, "distillation_loss": 4.425161361694336, "epoch": 3.18, "learning_rate": 3.790269559500329e-05, "loss": 86.6261, "step": 3759, "task_loss": 2.2625224590301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7757484519607377, "compression/movement_sparsity/importance_threshold": -0.001570598375349775, "compression/movement_sparsity/linear_layer_sparsity": 0.767750864454457, "compression/movement_sparsity/model_sparsity": 0.7413762603825274, "compression_loss": 82.8706283569336, "distillation_loss": 3.4967360496520996, "epoch": 3.18, "learning_rate": 3.789799943646098e-05, "loss": 86.4596, "step": 3760, "task_loss": 2.3827271461486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.776060056660984, "compression/movement_sparsity/importance_threshold": -0.0015684159786607125, "compression/movement_sparsity/linear_layer_sparsity": 0.768040633652176, "compression/movement_sparsity/model_sparsity": 0.7416560751168705, "compression_loss": 82.90339660644531, "distillation_loss": 3.4624850749969482, "epoch": 3.18, "learning_rate": 3.7893303277918666e-05, "loss": 86.6068, "step": 3761, "task_loss": 1.4559991359710693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7763713725709822, "compression/movement_sparsity/importance_threshold": -0.0015662356045819652, "compression/movement_sparsity/linear_layer_sparsity": 0.7683057794437271, "compression/movement_sparsity/model_sparsity": 0.7419121123347979, "compression_loss": 82.93623352050781, "distillation_loss": 2.6669912338256836, "epoch": 3.18, "learning_rate": 3.788860711937635e-05, "loss": 86.1814, "step": 3762, "task_loss": 1.1519489288330078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7766823998246171, "compression/movement_sparsity/importance_threshold": -0.0015640572521758407, "compression/movement_sparsity/linear_layer_sparsity": 0.7685119483021512, "compression/movement_sparsity/model_sparsity": 0.7421111986586856, "compression_loss": 82.96902465820312, "distillation_loss": 2.851030111312866, "epoch": 3.18, "learning_rate": 3.788391096083404e-05, "loss": 85.6626, "step": 3763, "task_loss": 2.0943679809570312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7769931385557741, "compression/movement_sparsity/importance_threshold": -0.0015618809205046403, "compression/movement_sparsity/linear_layer_sparsity": 0.768793048629999, "compression/movement_sparsity/model_sparsity": 0.742382642325506, "compression_loss": 83.00176239013672, "distillation_loss": 3.6951303482055664, "epoch": 3.18, "learning_rate": 3.787921480229173e-05, "loss": 86.9678, "step": 3764, "task_loss": 2.1145670413970947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7773035888983381, "compression/movement_sparsity/importance_threshold": -0.0015597066086306711, "compression/movement_sparsity/linear_layer_sparsity": 0.7690655277846461, "compression/movement_sparsity/model_sparsity": 0.7426457609829471, "compression_loss": 83.03450012207031, "distillation_loss": 5.087008476257324, "epoch": 3.18, "learning_rate": 3.787451864374941e-05, "loss": 86.9128, "step": 3765, "task_loss": 2.8062498569488525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7776137509861945, "compression/movement_sparsity/importance_threshold": -0.0015575343156162347, "compression/movement_sparsity/linear_layer_sparsity": 0.7693148144332415, "compression/movement_sparsity/model_sparsity": 0.7428864838682677, "compression_loss": 83.06720733642578, "distillation_loss": 4.947211742401123, "epoch": 3.18, "learning_rate": 3.7869822485207104e-05, "loss": 87.0079, "step": 3766, "task_loss": 3.5668604373931885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.777923624953228, "compression/movement_sparsity/importance_threshold": -0.0015553640405236374, "compression/movement_sparsity/linear_layer_sparsity": 0.7695673563796014, "compression/movement_sparsity/model_sparsity": 0.7431303502218604, "compression_loss": 83.09982299804688, "distillation_loss": 3.4829301834106445, "epoch": 3.18, "learning_rate": 3.786512632666479e-05, "loss": 86.6857, "step": 3767, "task_loss": 1.8114540576934814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7782332109333243, "compression/movement_sparsity/importance_threshold": -0.001553195782415179, "compression/movement_sparsity/linear_layer_sparsity": 0.7699054184562457, "compression/movement_sparsity/model_sparsity": 0.7434567988261713, "compression_loss": 83.13243865966797, "distillation_loss": 3.691450834274292, "epoch": 3.19, "learning_rate": 3.786043016812248e-05, "loss": 87.2559, "step": 3768, "task_loss": 3.0252506732940674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.778542509060368, "compression/movement_sparsity/importance_threshold": -0.001551029540353169, "compression/movement_sparsity/linear_layer_sparsity": 0.7703197713574244, "compression/movement_sparsity/model_sparsity": 0.7438569174304959, "compression_loss": 83.16503143310547, "distillation_loss": 3.975186824798584, "epoch": 3.19, "learning_rate": 3.785573400958016e-05, "loss": 86.9055, "step": 3769, "task_loss": 1.3500369787216187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7788515194682446, "compression/movement_sparsity/importance_threshold": -0.0015488653133999076, "compression/movement_sparsity/linear_layer_sparsity": 0.7706234560587745, "compression/movement_sparsity/model_sparsity": 0.7441501696281112, "compression_loss": 83.19757080078125, "distillation_loss": 3.2845211029052734, "epoch": 3.19, "learning_rate": 3.785103785103785e-05, "loss": 86.6716, "step": 3770, "task_loss": 1.1226887702941895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7791602422908394, "compression/movement_sparsity/importance_threshold": -0.0015467031006176982, "compression/movement_sparsity/linear_layer_sparsity": 0.7708980219427579, "compression/movement_sparsity/model_sparsity": 0.7444153033293164, "compression_loss": 83.23015594482422, "distillation_loss": 4.058924198150635, "epoch": 3.19, "learning_rate": 3.784634169249554e-05, "loss": 87.3424, "step": 3771, "task_loss": 3.3591055870056152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.779468677662037, "compression/movement_sparsity/importance_threshold": -0.0015445429010688483, "compression/movement_sparsity/linear_layer_sparsity": 0.7712287387321386, "compression/movement_sparsity/model_sparsity": 0.7447346589795779, "compression_loss": 83.26264953613281, "distillation_loss": 2.798048496246338, "epoch": 3.19, "learning_rate": 3.784164553395323e-05, "loss": 86.5358, "step": 3772, "task_loss": 1.4800227880477905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.779776825715723, "compression/movement_sparsity/importance_threshold": -0.0015423847138156576, "compression/movement_sparsity/linear_layer_sparsity": 0.7714439580337983, "compression/movement_sparsity/model_sparsity": 0.7449424848361336, "compression_loss": 83.29512023925781, "distillation_loss": 3.860445022583008, "epoch": 3.19, "learning_rate": 3.7836949375410915e-05, "loss": 87.6684, "step": 3773, "task_loss": 2.187854051589966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7800846865857822, "compression/movement_sparsity/importance_threshold": -0.001540228537920435, "compression/movement_sparsity/linear_layer_sparsity": 0.77163324227085, "compression/movement_sparsity/model_sparsity": 0.7451252665773361, "compression_loss": 83.32754516601562, "distillation_loss": 3.4418082237243652, "epoch": 3.19, "learning_rate": 3.78322532168686e-05, "loss": 87.0282, "step": 3774, "task_loss": 2.210050344467163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7803922604061002, "compression/movement_sparsity/importance_threshold": -0.0015380743724454794, "compression/movement_sparsity/linear_layer_sparsity": 0.7719419947434454, "compression/movement_sparsity/model_sparsity": 0.7454234124526642, "compression_loss": 83.35993957519531, "distillation_loss": 3.5467007160186768, "epoch": 3.19, "learning_rate": 3.782755705832629e-05, "loss": 87.4188, "step": 3775, "task_loss": 1.9753005504608154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7806995473105616, "compression/movement_sparsity/importance_threshold": -0.0015359222164530988, "compression/movement_sparsity/linear_layer_sparsity": 0.772313086764441, "compression/movement_sparsity/model_sparsity": 0.7457817563211261, "compression_loss": 83.39227294921875, "distillation_loss": 3.7774386405944824, "epoch": 3.19, "learning_rate": 3.782286089978398e-05, "loss": 86.5792, "step": 3776, "task_loss": 1.415449857711792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7810065474330519, "compression/movement_sparsity/importance_threshold": -0.0015337720690055947, "compression/movement_sparsity/linear_layer_sparsity": 0.7727213941126283, "compression/movement_sparsity/model_sparsity": 0.7461760370558027, "compression_loss": 83.4245834350586, "distillation_loss": 4.428663730621338, "epoch": 3.19, "learning_rate": 3.781816474124167e-05, "loss": 87.4608, "step": 3777, "task_loss": 3.0786337852478027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.781313260907456, "compression/movement_sparsity/importance_threshold": -0.0015316239291652727, "compression/movement_sparsity/linear_layer_sparsity": 0.7731110878351359, "compression/movement_sparsity/model_sparsity": 0.7465523436001041, "compression_loss": 83.45686340332031, "distillation_loss": 5.7508673667907715, "epoch": 3.19, "learning_rate": 3.7813468582699354e-05, "loss": 86.99, "step": 3778, "task_loss": 3.0719847679138184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7816196878676595, "compression/movement_sparsity/importance_threshold": -0.001529477795994434, "compression/movement_sparsity/linear_layer_sparsity": 0.7734718654511266, "compression/movement_sparsity/model_sparsity": 0.7469007273951037, "compression_loss": 83.48909759521484, "distillation_loss": 3.214224100112915, "epoch": 3.19, "learning_rate": 3.780877242415704e-05, "loss": 86.8352, "step": 3779, "task_loss": 1.7268993854522705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7819258284475468, "compression/movement_sparsity/importance_threshold": -0.0015273336685553878, "compression/movement_sparsity/linear_layer_sparsity": 0.7737915046887734, "compression/movement_sparsity/model_sparsity": 0.7472093860416122, "compression_loss": 83.52133178710938, "distillation_loss": 3.756901741027832, "epoch": 3.2, "learning_rate": 3.7804076265614727e-05, "loss": 87.2606, "step": 3780, "task_loss": 3.4098353385925293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7822316827810035, "compression/movement_sparsity/importance_threshold": -0.0015251915459104336, "compression/movement_sparsity/linear_layer_sparsity": 0.7741004479480511, "compression/movement_sparsity/model_sparsity": 0.747507716149513, "compression_loss": 83.55345153808594, "distillation_loss": 4.934846878051758, "epoch": 3.2, "learning_rate": 3.779938010707242e-05, "loss": 87.5125, "step": 3781, "task_loss": 3.315051555633545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7825372510019151, "compression/movement_sparsity/importance_threshold": -0.0015230514271218753, "compression/movement_sparsity/linear_layer_sparsity": 0.7744260611936836, "compression/movement_sparsity/model_sparsity": 0.7478221435784546, "compression_loss": 83.58560180664062, "distillation_loss": 2.9160609245300293, "epoch": 3.2, "learning_rate": 3.77946839485301e-05, "loss": 87.2105, "step": 3782, "task_loss": 1.9747040271759033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.782842533244166, "compression/movement_sparsity/importance_threshold": -0.0015209133112520194, "compression/movement_sparsity/linear_layer_sparsity": 0.7747626446735411, "compression/movement_sparsity/model_sparsity": 0.748147164380327, "compression_loss": 83.61772918701172, "distillation_loss": 3.6073789596557617, "epoch": 3.2, "learning_rate": 3.778998778998779e-05, "loss": 87.1379, "step": 3783, "task_loss": 2.035456657409668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7831475296416418, "compression/movement_sparsity/importance_threshold": -0.0015187771973631681, "compression/movement_sparsity/linear_layer_sparsity": 0.7751337843912073, "compression/movement_sparsity/model_sparsity": 0.7485055543069321, "compression_loss": 83.64981842041016, "distillation_loss": 4.648983955383301, "epoch": 3.2, "learning_rate": 3.778529163144548e-05, "loss": 87.2781, "step": 3784, "task_loss": 1.729897141456604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7834522403282274, "compression/movement_sparsity/importance_threshold": -0.001516643084517628, "compression/movement_sparsity/linear_layer_sparsity": 0.7753683685411077, "compression/movement_sparsity/model_sparsity": 0.7487320797696181, "compression_loss": 83.68184661865234, "distillation_loss": 5.057370185852051, "epoch": 3.2, "learning_rate": 3.778059547290317e-05, "loss": 87.6817, "step": 3785, "task_loss": 2.5154778957366943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7837566654378082, "compression/movement_sparsity/importance_threshold": -0.0015145109717776984, "compression/movement_sparsity/linear_layer_sparsity": 0.7756306048357555, "compression/movement_sparsity/model_sparsity": 0.7489853074408116, "compression_loss": 83.71393585205078, "distillation_loss": 3.712498903274536, "epoch": 3.2, "learning_rate": 3.777589931436085e-05, "loss": 87.3655, "step": 3786, "task_loss": 1.7156929969787598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.784060805104269, "compression/movement_sparsity/importance_threshold": -0.0015123808582056877, "compression/movement_sparsity/linear_layer_sparsity": 0.7759069951173873, "compression/movement_sparsity/model_sparsity": 0.7492522028659933, "compression_loss": 83.74591827392578, "distillation_loss": 3.684849977493286, "epoch": 3.2, "learning_rate": 3.777120315581854e-05, "loss": 86.9913, "step": 3787, "task_loss": 2.582385540008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7843646594614955, "compression/movement_sparsity/importance_threshold": -0.0015102527428638964, "compression/movement_sparsity/linear_layer_sparsity": 0.7762349454998764, "compression/movement_sparsity/model_sparsity": 0.7495688871439505, "compression_loss": 83.77786254882812, "distillation_loss": 4.3996686935424805, "epoch": 3.2, "learning_rate": 3.776650699727623e-05, "loss": 87.319, "step": 3788, "task_loss": 1.9721534252166748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7846682286433722, "compression/movement_sparsity/importance_threshold": -0.0015081266248146317, "compression/movement_sparsity/linear_layer_sparsity": 0.7764971698703567, "compression/movement_sparsity/model_sparsity": 0.7498221033006083, "compression_loss": 83.809814453125, "distillation_loss": 4.153657913208008, "epoch": 3.2, "learning_rate": 3.776181083873392e-05, "loss": 86.8527, "step": 3789, "task_loss": 2.619418144226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7849715127837844, "compression/movement_sparsity/importance_threshold": -0.0015060025031201968, "compression/movement_sparsity/linear_layer_sparsity": 0.7766971262374425, "compression/movement_sparsity/model_sparsity": 0.7500151905513469, "compression_loss": 83.84172821044922, "distillation_loss": 3.0294246673583984, "epoch": 3.2, "learning_rate": 3.775711468019161e-05, "loss": 87.1422, "step": 3790, "task_loss": 0.7556239366531372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7852745120166177, "compression/movement_sparsity/importance_threshold": -0.0015038803768428937, "compression/movement_sparsity/linear_layer_sparsity": 0.7770216543838201, "compression/movement_sparsity/model_sparsity": 0.7503285701575312, "compression_loss": 83.87360382080078, "distillation_loss": 2.9226419925689697, "epoch": 3.2, "learning_rate": 3.775241852164929e-05, "loss": 87.0064, "step": 3791, "task_loss": 1.9306540489196777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7855772264757566, "compression/movement_sparsity/importance_threshold": -0.001501760245045029, "compression/movement_sparsity/linear_layer_sparsity": 0.7773739300682864, "compression/movement_sparsity/model_sparsity": 0.7506687440885093, "compression_loss": 83.90543365478516, "distillation_loss": 3.404066562652588, "epoch": 3.21, "learning_rate": 3.774772236310698e-05, "loss": 87.303, "step": 3792, "task_loss": 1.1482508182525635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7858796562950868, "compression/movement_sparsity/importance_threshold": -0.001499642106788903, "compression/movement_sparsity/linear_layer_sparsity": 0.7776549469269608, "compression/movement_sparsity/model_sparsity": 0.7509401071535793, "compression_loss": 83.9372787475586, "distillation_loss": 2.883958101272583, "epoch": 3.21, "learning_rate": 3.774302620456467e-05, "loss": 87.1241, "step": 3793, "task_loss": 1.0248199701309204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.786181801608493, "compression/movement_sparsity/importance_threshold": -0.0014975259611368233, "compression/movement_sparsity/linear_layer_sparsity": 0.7778897814843815, "compression/movement_sparsity/model_sparsity": 0.751166874421517, "compression_loss": 83.96902465820312, "distillation_loss": 2.95987606048584, "epoch": 3.21, "learning_rate": 3.7738330046022356e-05, "loss": 87.3736, "step": 3794, "task_loss": 1.4281424283981323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7864836625498608, "compression/movement_sparsity/importance_threshold": -0.001495411807151091, "compression/movement_sparsity/linear_layer_sparsity": 0.7780700868232033, "compression/movement_sparsity/model_sparsity": 0.7513409857172662, "compression_loss": 84.00077819824219, "distillation_loss": 1.937713861465454, "epoch": 3.21, "learning_rate": 3.773363388748004e-05, "loss": 86.772, "step": 3795, "task_loss": 1.7503010034561157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7867852392530748, "compression/movement_sparsity/importance_threshold": -0.0014932996438940145, "compression/movement_sparsity/linear_layer_sparsity": 0.7783556587139147, "compression/movement_sparsity/model_sparsity": 0.7516167473350096, "compression_loss": 84.03253936767578, "distillation_loss": 2.5877861976623535, "epoch": 3.21, "learning_rate": 3.772893772893773e-05, "loss": 87.2933, "step": 3796, "task_loss": 1.5247256755828857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7870865318520205, "compression/movement_sparsity/importance_threshold": -0.0014911894704278932, "compression/movement_sparsity/linear_layer_sparsity": 0.7786095600153851, "compression/movement_sparsity/model_sparsity": 0.7518619263456828, "compression_loss": 84.06420135498047, "distillation_loss": 3.9439549446105957, "epoch": 3.21, "learning_rate": 3.772424157039542e-05, "loss": 87.6646, "step": 3797, "task_loss": 1.7856820821762085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7873875404805828, "compression/movement_sparsity/importance_threshold": -0.0014890812858150337, "compression/movement_sparsity/linear_layer_sparsity": 0.7789716015931452, "compression/movement_sparsity/model_sparsity": 0.7522115306814766, "compression_loss": 84.09585571289062, "distillation_loss": 4.824485778808594, "epoch": 3.21, "learning_rate": 3.771954541185311e-05, "loss": 87.8309, "step": 3798, "task_loss": 1.8355602025985718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.787688265272647, "compression/movement_sparsity/importance_threshold": -0.0014869750891177399, "compression/movement_sparsity/linear_layer_sparsity": 0.7791246125122485, "compression/movement_sparsity/model_sparsity": 0.7523592852047922, "compression_loss": 84.12753295898438, "distillation_loss": 4.292254447937012, "epoch": 3.21, "learning_rate": 3.7714849253310794e-05, "loss": 88.0214, "step": 3799, "task_loss": 1.7683204412460327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7879887063620985, "compression/movement_sparsity/importance_threshold": -0.001484870879398312, "compression/movement_sparsity/linear_layer_sparsity": 0.7794336273165319, "compression/movement_sparsity/model_sparsity": 0.7526576843999077, "compression_loss": 84.15913391113281, "distillation_loss": 4.301012992858887, "epoch": 3.21, "learning_rate": 3.771015309476848e-05, "loss": 88.1024, "step": 3800, "task_loss": 2.6688618659973145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7882888638828219, "compression/movement_sparsity/importance_threshold": -0.0014827686557190595, "compression/movement_sparsity/linear_layer_sparsity": 0.779741998215763, "compression/movement_sparsity/model_sparsity": 0.7529554618100903, "compression_loss": 84.19073486328125, "distillation_loss": 2.6608834266662598, "epoch": 3.21, "learning_rate": 3.770545693622617e-05, "loss": 87.1059, "step": 3801, "task_loss": 1.426790475845337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7885887379687029, "compression/movement_sparsity/importance_threshold": -0.0014806684171422817, "compression/movement_sparsity/linear_layer_sparsity": 0.7800697101148993, "compression/movement_sparsity/model_sparsity": 0.7532719157973317, "compression_loss": 84.22232055664062, "distillation_loss": 2.390174627304077, "epoch": 3.21, "learning_rate": 3.770076077768386e-05, "loss": 87.7149, "step": 3802, "task_loss": 2.257765531539917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.788888328753626, "compression/movement_sparsity/importance_threshold": -0.001478570162730287, "compression/movement_sparsity/linear_layer_sparsity": 0.7803571421757618, "compression/movement_sparsity/model_sparsity": 0.7535494736826591, "compression_loss": 84.25386810302734, "distillation_loss": 3.9160256385803223, "epoch": 3.21, "learning_rate": 3.7696064619141546e-05, "loss": 87.5584, "step": 3803, "task_loss": 1.6712257862091064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7891876363714769, "compression/movement_sparsity/importance_threshold": -0.0014764738915453756, "compression/movement_sparsity/linear_layer_sparsity": 0.7806464821034461, "compression/movement_sparsity/model_sparsity": 0.7538288738937137, "compression_loss": 84.28531646728516, "distillation_loss": 4.057296276092529, "epoch": 3.22, "learning_rate": 3.769136846059923e-05, "loss": 88.2353, "step": 3804, "task_loss": 2.9025015830993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7894866609561405, "compression/movement_sparsity/importance_threshold": -0.0014743796026498541, "compression/movement_sparsity/linear_layer_sparsity": 0.7810882725143548, "compression/movement_sparsity/model_sparsity": 0.7542554874449013, "compression_loss": 84.31680297851562, "distillation_loss": 3.3613243103027344, "epoch": 3.22, "learning_rate": 3.768667230205692e-05, "loss": 87.9363, "step": 3805, "task_loss": 1.6264674663543701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7897854026415021, "compression/movement_sparsity/importance_threshold": -0.001472287295106024, "compression/movement_sparsity/linear_layer_sparsity": 0.7813264696870488, "compression/movement_sparsity/model_sparsity": 0.7544855018119331, "compression_loss": 84.34823608398438, "distillation_loss": 3.5399203300476074, "epoch": 3.22, "learning_rate": 3.7681976143514605e-05, "loss": 87.6794, "step": 3806, "task_loss": 1.5807515382766724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7900838615614465, "compression/movement_sparsity/importance_threshold": -0.0014701969679761914, "compression/movement_sparsity/linear_layer_sparsity": 0.7816431278827868, "compression/movement_sparsity/model_sparsity": 0.754791281824493, "compression_loss": 84.37964630126953, "distillation_loss": 5.519688606262207, "epoch": 3.22, "learning_rate": 3.76772799849723e-05, "loss": 87.9776, "step": 3807, "task_loss": 2.925546407699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7903820378498592, "compression/movement_sparsity/importance_threshold": -0.0014681086203226588, "compression/movement_sparsity/linear_layer_sparsity": 0.7820526872685758, "compression/movement_sparsity/model_sparsity": 0.755186771585428, "compression_loss": 84.41105651855469, "distillation_loss": 2.308300018310547, "epoch": 3.22, "learning_rate": 3.767258382642998e-05, "loss": 88.5638, "step": 3808, "task_loss": 1.2355440855026245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.790679931640625, "compression/movement_sparsity/importance_threshold": -0.0014660222512077326, "compression/movement_sparsity/linear_layer_sparsity": 0.7822079876278653, "compression/movement_sparsity/model_sparsity": 0.7553367368996161, "compression_loss": 84.4424057006836, "distillation_loss": 4.087986946105957, "epoch": 3.22, "learning_rate": 3.766788766788767e-05, "loss": 88.5734, "step": 3809, "task_loss": 2.480222463607788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7909775430676292, "compression/movement_sparsity/importance_threshold": -0.0014639378596937149, "compression/movement_sparsity/linear_layer_sparsity": 0.7824988538490069, "compression/movement_sparsity/model_sparsity": 0.7556176109712524, "compression_loss": 84.47374725341797, "distillation_loss": 4.287975311279297, "epoch": 3.22, "learning_rate": 3.766319150934536e-05, "loss": 88.2891, "step": 3810, "task_loss": 2.5096845626831055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7912748722647573, "compression/movement_sparsity/importance_threshold": -0.001461855444842908, "compression/movement_sparsity/linear_layer_sparsity": 0.7827470792466826, "compression/movement_sparsity/model_sparsity": 0.7558573090628873, "compression_loss": 84.50497436523438, "distillation_loss": 4.653452396392822, "epoch": 3.22, "learning_rate": 3.7658495350803044e-05, "loss": 89.0286, "step": 3811, "task_loss": 2.8024399280548096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7915719193658939, "compression/movement_sparsity/importance_threshold": -0.001459775005717619, "compression/movement_sparsity/linear_layer_sparsity": 0.7829279569455511, "compression/movement_sparsity/model_sparsity": 0.7560319730563547, "compression_loss": 84.5362777709961, "distillation_loss": 2.995875597000122, "epoch": 3.22, "learning_rate": 3.765379919226073e-05, "loss": 87.8015, "step": 3812, "task_loss": 0.972717821598053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7918686845049245, "compression/movement_sparsity/importance_threshold": -0.0014576965413801496, "compression/movement_sparsity/linear_layer_sparsity": 0.7831754668931686, "compression/movement_sparsity/model_sparsity": 0.7562709802758419, "compression_loss": 84.56742095947266, "distillation_loss": 4.405223846435547, "epoch": 3.22, "learning_rate": 3.7649103033718416e-05, "loss": 88.9009, "step": 3813, "task_loss": 2.338395357131958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7921651678157339, "compression/movement_sparsity/importance_threshold": -0.001455620050892806, "compression/movement_sparsity/linear_layer_sparsity": 0.7835576960867362, "compression/movement_sparsity/model_sparsity": 0.756640078720736, "compression_loss": 84.5986099243164, "distillation_loss": 3.6552658081054688, "epoch": 3.22, "learning_rate": 3.764440687517611e-05, "loss": 88.4083, "step": 3814, "task_loss": 2.494311571121216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7924613694322076, "compression/movement_sparsity/importance_threshold": -0.0014535455333178904, "compression/movement_sparsity/linear_layer_sparsity": 0.7838047529159836, "compression/movement_sparsity/model_sparsity": 0.7568786483878631, "compression_loss": 84.62977600097656, "distillation_loss": 4.410017967224121, "epoch": 3.22, "learning_rate": 3.7639710716633796e-05, "loss": 88.7685, "step": 3815, "task_loss": 3.7206950187683105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7927572894882304, "compression/movement_sparsity/importance_threshold": -0.0014514729877177076, "compression/movement_sparsity/linear_layer_sparsity": 0.7840937112703035, "compression/movement_sparsity/model_sparsity": 0.7571576801337722, "compression_loss": 84.6609115600586, "distillation_loss": 3.899064540863037, "epoch": 3.23, "learning_rate": 3.763501455809148e-05, "loss": 88.1543, "step": 3816, "task_loss": 2.4852349758148193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7930529281176879, "compression/movement_sparsity/importance_threshold": -0.0014494024131545599, "compression/movement_sparsity/linear_layer_sparsity": 0.78443196413363, "compression/movement_sparsity/model_sparsity": 0.7574843129706559, "compression_loss": 84.69200897216797, "distillation_loss": 3.362546920776367, "epoch": 3.23, "learning_rate": 3.763031839954917e-05, "loss": 88.3906, "step": 3817, "task_loss": 2.102660655975342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7933482854544647, "compression/movement_sparsity/importance_threshold": -0.0014473338086907553, "compression/movement_sparsity/linear_layer_sparsity": 0.7845679234930142, "compression/movement_sparsity/model_sparsity": 0.7576156017077853, "compression_loss": 84.72306060791016, "distillation_loss": 3.972957134246826, "epoch": 3.23, "learning_rate": 3.7625622241006855e-05, "loss": 88.4042, "step": 3818, "task_loss": 2.2127726078033447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7936433616324464, "compression/movement_sparsity/importance_threshold": -0.0014452671733885926, "compression/movement_sparsity/linear_layer_sparsity": 0.784830064394321, "compression/movement_sparsity/model_sparsity": 0.7578687372626924, "compression_loss": 84.75406646728516, "distillation_loss": 4.148760795593262, "epoch": 3.23, "learning_rate": 3.762092608246455e-05, "loss": 88.2685, "step": 3819, "task_loss": 2.0772180557250977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7939381567855177, "compression/movement_sparsity/importance_threshold": -0.00144320250631038, "compression/movement_sparsity/linear_layer_sparsity": 0.7850356251201956, "compression/movement_sparsity/model_sparsity": 0.7580672363452545, "compression_loss": 84.78510284423828, "distillation_loss": 3.228693962097168, "epoch": 3.23, "learning_rate": 3.7616229923922234e-05, "loss": 87.7965, "step": 3820, "task_loss": 3.539278745651245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.794232671047564, "compression/movement_sparsity/importance_threshold": -0.0014411398065184208, "compression/movement_sparsity/linear_layer_sparsity": 0.7852586666758244, "compression/movement_sparsity/model_sparsity": 0.7582826157372915, "compression_loss": 84.81608581542969, "distillation_loss": 4.823392391204834, "epoch": 3.23, "learning_rate": 3.761153376537992e-05, "loss": 88.3342, "step": 3821, "task_loss": 2.860661506652832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7945269045524707, "compression/movement_sparsity/importance_threshold": -0.0014390790730750159, "compression/movement_sparsity/linear_layer_sparsity": 0.7854252472976975, "compression/movement_sparsity/model_sparsity": 0.7584434738023412, "compression_loss": 84.84703826904297, "distillation_loss": 2.765225410461426, "epoch": 3.23, "learning_rate": 3.760683760683761e-05, "loss": 87.8674, "step": 3822, "task_loss": 2.1998605728149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7948208574341225, "compression/movement_sparsity/importance_threshold": -0.001437020305042472, "compression/movement_sparsity/linear_layer_sparsity": 0.7856843594604247, "compression/movement_sparsity/model_sparsity": 0.7586936846651565, "compression_loss": 84.87796020507812, "distillation_loss": 4.972770690917969, "epoch": 3.23, "learning_rate": 3.76021414482953e-05, "loss": 88.9424, "step": 3823, "task_loss": 2.1536340713500977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7951145298264047, "compression/movement_sparsity/importance_threshold": -0.001434963501483092, "compression/movement_sparsity/linear_layer_sparsity": 0.785997309240028, "compression/movement_sparsity/model_sparsity": 0.7589958836570843, "compression_loss": 84.90882873535156, "distillation_loss": 3.387336254119873, "epoch": 3.23, "learning_rate": 3.7597445289752987e-05, "loss": 88.0395, "step": 3824, "task_loss": 1.59870183467865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7954079218632024, "compression/movement_sparsity/importance_threshold": -0.001432908661459181, "compression/movement_sparsity/linear_layer_sparsity": 0.7862229144916985, "compression/movement_sparsity/model_sparsity": 0.759213738674317, "compression_loss": 84.9397201538086, "distillation_loss": 3.674741744995117, "epoch": 3.23, "learning_rate": 3.7592749131210666e-05, "loss": 87.8829, "step": 3825, "task_loss": 1.4453003406524658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.795701033678401, "compression/movement_sparsity/importance_threshold": -0.0014308557840330416, "compression/movement_sparsity/linear_layer_sparsity": 0.7864662986773141, "compression/movement_sparsity/model_sparsity": 0.7594487618644193, "compression_loss": 84.97051239013672, "distillation_loss": 2.7658677101135254, "epoch": 3.23, "learning_rate": 3.758805297266836e-05, "loss": 88.4673, "step": 3826, "task_loss": 1.799147367477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.795993865405885, "compression/movement_sparsity/importance_threshold": -0.0014288048682669805, "compression/movement_sparsity/linear_layer_sparsity": 0.7868150328839926, "compression/movement_sparsity/model_sparsity": 0.7597855159782665, "compression_loss": 85.00128173828125, "distillation_loss": 3.159055233001709, "epoch": 3.23, "learning_rate": 3.7583356814126046e-05, "loss": 88.4391, "step": 3827, "task_loss": 1.5958257913589478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7962864171795404, "compression/movement_sparsity/importance_threshold": -0.0014267559132232974, "compression/movement_sparsity/linear_layer_sparsity": 0.7870434403150575, "compression/movement_sparsity/model_sparsity": 0.760006076911411, "compression_loss": 85.03201293945312, "distillation_loss": 5.409295082092285, "epoch": 3.24, "learning_rate": 3.757866065558374e-05, "loss": 88.9511, "step": 3828, "task_loss": 3.3699069023132324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7965786891332516, "compression/movement_sparsity/importance_threshold": -0.0014247089179643003, "compression/movement_sparsity/linear_layer_sparsity": 0.7873449428937302, "compression/movement_sparsity/model_sparsity": 0.7602972219489759, "compression_loss": 85.06273651123047, "distillation_loss": 3.787811040878296, "epoch": 3.24, "learning_rate": 3.757396449704142e-05, "loss": 89.4122, "step": 3829, "task_loss": 1.6658166646957397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7968706814009041, "compression/movement_sparsity/importance_threshold": -0.0014226638815522906, "compression/movement_sparsity/linear_layer_sparsity": 0.7876464931690736, "compression/movement_sparsity/model_sparsity": 0.760588413044684, "compression_loss": 85.09345245361328, "distillation_loss": 2.128357410430908, "epoch": 3.24, "learning_rate": 3.756926833849911e-05, "loss": 88.0226, "step": 3830, "task_loss": 1.1244258880615234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7971623941163832, "compression/movement_sparsity/importance_threshold": -0.0014206208030495722, "compression/movement_sparsity/linear_layer_sparsity": 0.787970997467116, "compression/movement_sparsity/model_sparsity": 0.7609017696217967, "compression_loss": 85.12411499023438, "distillation_loss": 3.292328119277954, "epoch": 3.24, "learning_rate": 3.75645721799568e-05, "loss": 88.6085, "step": 3831, "task_loss": 1.8629060983657837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7974538274135735, "compression/movement_sparsity/importance_threshold": -0.0014185796815184525, "compression/movement_sparsity/linear_layer_sparsity": 0.7882475546870946, "compression/movement_sparsity/model_sparsity": 0.7611688262504794, "compression_loss": 85.15473937988281, "distillation_loss": 5.397208213806152, "epoch": 3.24, "learning_rate": 3.7559876021414484e-05, "loss": 89.1523, "step": 3832, "task_loss": 2.1503803730010986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7977449814263609, "compression/movement_sparsity/importance_threshold": -0.00141654051602123, "compression/movement_sparsity/linear_layer_sparsity": 0.7885250062196458, "compression/movement_sparsity/model_sparsity": 0.7614367464693468, "compression_loss": 85.1853256225586, "distillation_loss": 2.5712857246398926, "epoch": 3.24, "learning_rate": 3.755517986287217e-05, "loss": 88.8309, "step": 3833, "task_loss": 1.2182261943817139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7980358562886297, "compression/movement_sparsity/importance_threshold": -0.0014145033056202141, "compression/movement_sparsity/linear_layer_sparsity": 0.7887556911667293, "compression/movement_sparsity/model_sparsity": 0.761659506678828, "compression_loss": 85.21598052978516, "distillation_loss": 6.438163757324219, "epoch": 3.24, "learning_rate": 3.755048370432986e-05, "loss": 89.3204, "step": 3834, "task_loss": 4.295038223266602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7983264521342657, "compression/movement_sparsity/importance_threshold": -0.001412468049377705, "compression/movement_sparsity/linear_layer_sparsity": 0.7889657473038026, "compression/movement_sparsity/model_sparsity": 0.7618623467413846, "compression_loss": 85.24652862548828, "distillation_loss": 4.066577434539795, "epoch": 3.24, "learning_rate": 3.754578754578755e-05, "loss": 88.9075, "step": 3835, "task_loss": 2.2365853786468506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7986167690971536, "compression/movement_sparsity/importance_threshold": -0.0014104347463560086, "compression/movement_sparsity/linear_layer_sparsity": 0.7891614467430423, "compression/movement_sparsity/model_sparsity": 0.7620513233028446, "compression_loss": 85.27703857421875, "distillation_loss": 3.2785258293151855, "epoch": 3.24, "learning_rate": 3.7541091387245236e-05, "loss": 88.6209, "step": 3836, "task_loss": 1.4205782413482666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.798906807311179, "compression/movement_sparsity/importance_threshold": -0.0014084033956174268, "compression/movement_sparsity/linear_layer_sparsity": 0.7894293827898202, "compression/movement_sparsity/model_sparsity": 0.762310054922148, "compression_loss": 85.30764770507812, "distillation_loss": 4.5068817138671875, "epoch": 3.24, "learning_rate": 3.753639522870292e-05, "loss": 88.8727, "step": 3837, "task_loss": 1.9963982105255127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7991965669102266, "compression/movement_sparsity/importance_threshold": -0.001406373996224267, "compression/movement_sparsity/linear_layer_sparsity": 0.7897739316136585, "compression/movement_sparsity/model_sparsity": 0.7626427674339312, "compression_loss": 85.33812713623047, "distillation_loss": 4.565608024597168, "epoch": 3.24, "learning_rate": 3.753169907016061e-05, "loss": 89.3987, "step": 3838, "task_loss": 2.062997341156006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7994860480281816, "compression/movement_sparsity/importance_threshold": -0.0014043465472388314, "compression/movement_sparsity/linear_layer_sparsity": 0.7900248161007171, "compression/movement_sparsity/model_sparsity": 0.7628850332670484, "compression_loss": 85.36865997314453, "distillation_loss": 4.000123500823975, "epoch": 3.24, "learning_rate": 3.7527002911618295e-05, "loss": 88.9871, "step": 3839, "task_loss": 1.7384661436080933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7997752507989294, "compression/movement_sparsity/importance_threshold": -0.0014023210477234222, "compression/movement_sparsity/linear_layer_sparsity": 0.7902616777666358, "compression/movement_sparsity/model_sparsity": 0.7631137580060712, "compression_loss": 85.39913177490234, "distillation_loss": 4.044249534606934, "epoch": 3.25, "learning_rate": 3.752230675307599e-05, "loss": 88.7668, "step": 3840, "task_loss": 2.224552869796753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8000641753563549, "compression/movement_sparsity/importance_threshold": -0.0014002974967403468, "compression/movement_sparsity/linear_layer_sparsity": 0.7905169384232168, "compression/movement_sparsity/model_sparsity": 0.7633602496738249, "compression_loss": 85.42953491210938, "distillation_loss": 6.171478748321533, "epoch": 3.25, "learning_rate": 3.7517610594533675e-05, "loss": 89.5598, "step": 3841, "task_loss": 3.6494827270507812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8003528218343435, "compression/movement_sparsity/importance_threshold": -0.0013982758933519055, "compression/movement_sparsity/linear_layer_sparsity": 0.7907662727684827, "compression/movement_sparsity/model_sparsity": 0.7636010186172887, "compression_loss": 85.45999145507812, "distillation_loss": 3.000821590423584, "epoch": 3.25, "learning_rate": 3.751291443599136e-05, "loss": 89.1276, "step": 3842, "task_loss": 1.9036200046539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8006411903667799, "compression/movement_sparsity/importance_threshold": -0.0013962562366204058, "compression/movement_sparsity/linear_layer_sparsity": 0.7910967987711811, "compression/movement_sparsity/model_sparsity": 0.7639201900349776, "compression_loss": 85.49034881591797, "distillation_loss": 6.182601451873779, "epoch": 3.25, "learning_rate": 3.750821827744905e-05, "loss": 89.7253, "step": 3843, "task_loss": 3.2545530796051025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8009292810875499, "compression/movement_sparsity/importance_threshold": -0.001394238525608148, "compression/movement_sparsity/linear_layer_sparsity": 0.7913165254082072, "compression/movement_sparsity/model_sparsity": 0.7641323683860636, "compression_loss": 85.52069091796875, "distillation_loss": 2.779271125793457, "epoch": 3.25, "learning_rate": 3.7503522118906734e-05, "loss": 89.4272, "step": 3844, "task_loss": 2.387385845184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8012170941305379, "compression/movement_sparsity/importance_threshold": -0.0013922227593774414, "compression/movement_sparsity/linear_layer_sparsity": 0.7914998594856085, "compression/movement_sparsity/model_sparsity": 0.7643094043739047, "compression_loss": 85.55097198486328, "distillation_loss": 3.1984975337982178, "epoch": 3.25, "learning_rate": 3.749882596036443e-05, "loss": 89.1108, "step": 3845, "task_loss": 1.8234485387802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8015046296296298, "compression/movement_sparsity/importance_threshold": -0.0013902089369905828, "compression/movement_sparsity/linear_layer_sparsity": 0.7917470236323647, "compression/movement_sparsity/model_sparsity": 0.764548077671854, "compression_loss": 85.58130645751953, "distillation_loss": 3.2152206897735596, "epoch": 3.25, "learning_rate": 3.7494129801822106e-05, "loss": 88.8376, "step": 3846, "task_loss": 1.5068246126174927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8017918877187099, "compression/movement_sparsity/importance_threshold": -0.001388197057509883, "compression/movement_sparsity/linear_layer_sparsity": 0.7919225950766352, "compression/movement_sparsity/model_sparsity": 0.7647176176968927, "compression_loss": 85.61157989501953, "distillation_loss": 4.448546886444092, "epoch": 3.25, "learning_rate": 3.74894336432798e-05, "loss": 89.3822, "step": 3847, "task_loss": 3.531698703765869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8020788685316641, "compression/movement_sparsity/importance_threshold": -0.0013861871199976408, "compression/movement_sparsity/linear_layer_sparsity": 0.7921442057321477, "compression/movement_sparsity/model_sparsity": 0.7649316153446343, "compression_loss": 85.64181518554688, "distillation_loss": 3.885610342025757, "epoch": 3.25, "learning_rate": 3.7484737484737486e-05, "loss": 88.9152, "step": 3848, "task_loss": 1.8488744497299194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8023655722023769, "compression/movement_sparsity/importance_threshold": -0.0013841791235161653, "compression/movement_sparsity/linear_layer_sparsity": 0.7923867433018612, "compression/movement_sparsity/model_sparsity": 0.7651658210026953, "compression_loss": 85.67201232910156, "distillation_loss": 3.7141404151916504, "epoch": 3.25, "learning_rate": 3.748004132619517e-05, "loss": 89.6597, "step": 3849, "task_loss": 2.290447473526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.802651998864734, "compression/movement_sparsity/importance_threshold": -0.0013821730671277552, "compression/movement_sparsity/linear_layer_sparsity": 0.7926834881376472, "compression/movement_sparsity/model_sparsity": 0.7654523717404782, "compression_loss": 85.70210266113281, "distillation_loss": 3.9642534255981445, "epoch": 3.25, "learning_rate": 3.7475345167652865e-05, "loss": 88.8215, "step": 3850, "task_loss": 1.4042876958847046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8029381486526203, "compression/movement_sparsity/importance_threshold": -0.0013801689498947178, "compression/movement_sparsity/linear_layer_sparsity": 0.7929777169740622, "compression/movement_sparsity/model_sparsity": 0.7657364929112085, "compression_loss": 85.73225402832031, "distillation_loss": 4.614724636077881, "epoch": 3.26, "learning_rate": 3.7470649009110545e-05, "loss": 89.2819, "step": 3851, "task_loss": 2.7875442504882812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8032240216999207, "compression/movement_sparsity/importance_threshold": -0.0013781667708793579, "compression/movement_sparsity/linear_layer_sparsity": 0.7932184301461274, "compression/movement_sparsity/model_sparsity": 0.7659689368452929, "compression_loss": 85.76235961914062, "distillation_loss": 5.234553337097168, "epoch": 3.26, "learning_rate": 3.746595285056824e-05, "loss": 89.7429, "step": 3852, "task_loss": 2.6951162815093994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8035096181405209, "compression/movement_sparsity/importance_threshold": -0.0013761665291439741, "compression/movement_sparsity/linear_layer_sparsity": 0.7933027220871453, "compression/movement_sparsity/model_sparsity": 0.7660503330988244, "compression_loss": 85.7923812866211, "distillation_loss": 3.2017905712127686, "epoch": 3.26, "learning_rate": 3.7461256692025924e-05, "loss": 89.4474, "step": 3853, "task_loss": 1.3360157012939453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8037949381083055, "compression/movement_sparsity/importance_threshold": -0.0013741682237508766, "compression/movement_sparsity/linear_layer_sparsity": 0.7935495881297105, "compression/movement_sparsity/model_sparsity": 0.7662887185333789, "compression_loss": 85.82234954833984, "distillation_loss": 3.7438907623291016, "epoch": 3.26, "learning_rate": 3.745656053348362e-05, "loss": 90.5502, "step": 3854, "task_loss": 1.6186754703521729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.80407998173716, "compression/movement_sparsity/importance_threshold": -0.0013721718537623656, "compression/movement_sparsity/linear_layer_sparsity": 0.7936648471340788, "compression/movement_sparsity/model_sparsity": 0.7664000180363689, "compression_loss": 85.85240173339844, "distillation_loss": 3.18158221244812, "epoch": 3.26, "learning_rate": 3.74518643749413e-05, "loss": 89.1389, "step": 3855, "task_loss": 2.566549777984619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8043647491609693, "compression/movement_sparsity/importance_threshold": -0.001370177418240747, "compression/movement_sparsity/linear_layer_sparsity": 0.7939893395079535, "compression/movement_sparsity/model_sparsity": 0.7667133630989457, "compression_loss": 85.88236999511719, "distillation_loss": 2.4177823066711426, "epoch": 3.26, "learning_rate": 3.744716821639899e-05, "loss": 88.9918, "step": 3856, "task_loss": 2.462205171585083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8046492405136187, "compression/movement_sparsity/importance_threshold": -0.0013681849162483243, "compression/movement_sparsity/linear_layer_sparsity": 0.7941820102086138, "compression/movement_sparsity/model_sparsity": 0.7668994149683139, "compression_loss": 85.91234588623047, "distillation_loss": 4.64054012298584, "epoch": 3.26, "learning_rate": 3.7442472057856676e-05, "loss": 89.561, "step": 3857, "task_loss": 3.1398696899414062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8049334559289932, "compression/movement_sparsity/importance_threshold": -0.0013661943468474009, "compression/movement_sparsity/linear_layer_sparsity": 0.794403465849947, "compression/movement_sparsity/model_sparsity": 0.7671132629270901, "compression_loss": 85.94224548339844, "distillation_loss": 2.4275131225585938, "epoch": 3.26, "learning_rate": 3.743777589931436e-05, "loss": 89.0713, "step": 3858, "task_loss": 1.8161195516586304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8052173955409783, "compression/movement_sparsity/importance_threshold": -0.0013642057091002805, "compression/movement_sparsity/linear_layer_sparsity": 0.7946538733703001, "compression/movement_sparsity/model_sparsity": 0.7673550681787754, "compression_loss": 85.97218322753906, "distillation_loss": 5.4072184562683105, "epoch": 3.26, "learning_rate": 3.743307974077205e-05, "loss": 89.9296, "step": 3859, "task_loss": 2.4943315982818604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8055010594834585, "compression/movement_sparsity/importance_threshold": -0.0013622190020692688, "compression/movement_sparsity/linear_layer_sparsity": 0.7949197226877417, "compression/movement_sparsity/model_sparsity": 0.7676117847543148, "compression_loss": 86.00200653076172, "distillation_loss": 4.195233345031738, "epoch": 3.26, "learning_rate": 3.7428383582229735e-05, "loss": 90.225, "step": 3860, "task_loss": 2.5529863834381104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8057844478903194, "compression/movement_sparsity/importance_threshold": -0.0013602342248166688, "compression/movement_sparsity/linear_layer_sparsity": 0.7952802737445474, "compression/movement_sparsity/model_sparsity": 0.7679599497731343, "compression_loss": 86.03182220458984, "distillation_loss": 3.866274356842041, "epoch": 3.26, "learning_rate": 3.742368742368743e-05, "loss": 89.8705, "step": 3861, "task_loss": 2.6183953285217285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8060675608954462, "compression/movement_sparsity/importance_threshold": -0.0013582513764047836, "compression/movement_sparsity/linear_layer_sparsity": 0.795507119109652, "compression/movement_sparsity/model_sparsity": 0.7681790023020896, "compression_loss": 86.06163787841797, "distillation_loss": 3.0795345306396484, "epoch": 3.26, "learning_rate": 3.7418991265145115e-05, "loss": 88.9724, "step": 3862, "task_loss": 1.9453215599060059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8063503986327237, "compression/movement_sparsity/importance_threshold": -0.0013562704558959188, "compression/movement_sparsity/linear_layer_sparsity": 0.7956969280100796, "compression/movement_sparsity/model_sparsity": 0.7683622906828671, "compression_loss": 86.0913314819336, "distillation_loss": 2.7485480308532715, "epoch": 3.27, "learning_rate": 3.74142951066028e-05, "loss": 90.1207, "step": 3863, "task_loss": 1.1412886381149292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8066329612360374, "compression/movement_sparsity/importance_threshold": -0.0013542914623523757, "compression/movement_sparsity/linear_layer_sparsity": 0.7959748803576716, "compression/movement_sparsity/model_sparsity": 0.7686306945122379, "compression_loss": 86.12100219726562, "distillation_loss": 3.6474804878234863, "epoch": 3.27, "learning_rate": 3.740959894806049e-05, "loss": 89.8424, "step": 3864, "task_loss": 1.7248187065124512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8069152488392721, "compression/movement_sparsity/importance_threshold": -0.0013523143948364624, "compression/movement_sparsity/linear_layer_sparsity": 0.7962524869044021, "compression/movement_sparsity/model_sparsity": 0.7688987644200705, "compression_loss": 86.15070343017578, "distillation_loss": 3.387557029724121, "epoch": 3.27, "learning_rate": 3.7404902789518174e-05, "loss": 89.7969, "step": 3865, "task_loss": 1.395418643951416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8071972615763134, "compression/movement_sparsity/importance_threshold": -0.0013503392524104778, "compression/movement_sparsity/linear_layer_sparsity": 0.7964429754823851, "compression/movement_sparsity/model_sparsity": 0.7690827091293884, "compression_loss": 86.18028259277344, "distillation_loss": 3.523892879486084, "epoch": 3.27, "learning_rate": 3.740020663097587e-05, "loss": 89.5473, "step": 3866, "task_loss": 2.007884979248047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8074789995810459, "compression/movement_sparsity/importance_threshold": -0.0013483660341367318, "compression/movement_sparsity/linear_layer_sparsity": 0.7966732550077689, "compression/movement_sparsity/model_sparsity": 0.7693050778446525, "compression_loss": 86.20990753173828, "distillation_loss": 3.096726894378662, "epoch": 3.27, "learning_rate": 3.739551047243355e-05, "loss": 89.5338, "step": 3867, "task_loss": 1.8810862302780151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8077604629873552, "compression/movement_sparsity/importance_threshold": -0.0013463947390775222, "compression/movement_sparsity/linear_layer_sparsity": 0.7968820948797434, "compression/movement_sparsity/model_sparsity": 0.7695067434245582, "compression_loss": 86.239501953125, "distillation_loss": 4.6692609786987305, "epoch": 3.27, "learning_rate": 3.739081431389124e-05, "loss": 89.6781, "step": 3868, "task_loss": 1.598134160041809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8080416519291259, "compression/movement_sparsity/importance_threshold": -0.001344425366295158, "compression/movement_sparsity/linear_layer_sparsity": 0.7971148307836601, "compression/movement_sparsity/model_sparsity": 0.769731484134196, "compression_loss": 86.26899719238281, "distillation_loss": 5.1784796714782715, "epoch": 3.27, "learning_rate": 3.7386118155348926e-05, "loss": 90.0477, "step": 3869, "task_loss": 2.346848964691162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8083225665402436, "compression/movement_sparsity/importance_threshold": -0.0013424579148519416, "compression/movement_sparsity/linear_layer_sparsity": 0.7973260077924913, "compression/movement_sparsity/model_sparsity": 0.7699354065631173, "compression_loss": 86.29850769042969, "distillation_loss": 6.489777565002441, "epoch": 3.27, "learning_rate": 3.738142199680661e-05, "loss": 90.9212, "step": 3870, "task_loss": 2.9595510959625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8086032069545934, "compression/movement_sparsity/importance_threshold": -0.001340492383810175, "compression/movement_sparsity/linear_layer_sparsity": 0.7976084317029467, "compression/movement_sparsity/model_sparsity": 0.770208128343411, "compression_loss": 86.32801818847656, "distillation_loss": 3.1317710876464844, "epoch": 3.27, "learning_rate": 3.7376725838264305e-05, "loss": 90.1283, "step": 3871, "task_loss": 2.0601143836975098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8088835733060602, "compression/movement_sparsity/importance_threshold": -0.0013385287722321657, "compression/movement_sparsity/linear_layer_sparsity": 0.7978264293356655, "compression/movement_sparsity/model_sparsity": 0.7704186370868068, "compression_loss": 86.35747528076172, "distillation_loss": 5.940177917480469, "epoch": 3.27, "learning_rate": 3.7372029679721985e-05, "loss": 90.7524, "step": 3872, "task_loss": 3.879319667816162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8091636657285296, "compression/movement_sparsity/importance_threshold": -0.001336567079180213, "compression/movement_sparsity/linear_layer_sparsity": 0.7980464779252178, "compression/movement_sparsity/model_sparsity": 0.7706311263303592, "compression_loss": 86.38692474365234, "distillation_loss": 4.192668914794922, "epoch": 3.27, "learning_rate": 3.736733352117968e-05, "loss": 90.1243, "step": 3873, "task_loss": 2.8153023719787598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8094434843558862, "compression/movement_sparsity/importance_threshold": -0.0013346073037166254, "compression/movement_sparsity/linear_layer_sparsity": 0.7982896355516482, "compression/movement_sparsity/model_sparsity": 0.7708659307442816, "compression_loss": 86.41630554199219, "distillation_loss": 4.960709571838379, "epoch": 3.27, "learning_rate": 3.7362637362637365e-05, "loss": 90.1977, "step": 3874, "task_loss": 2.4870309829711914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8097230293220155, "compression/movement_sparsity/importance_threshold": -0.001332649444903705, "compression/movement_sparsity/linear_layer_sparsity": 0.7985713559362131, "compression/movement_sparsity/model_sparsity": 0.7711379731669633, "compression_loss": 86.44563293457031, "distillation_loss": 3.0128109455108643, "epoch": 3.28, "learning_rate": 3.735794120409505e-05, "loss": 89.6698, "step": 3875, "task_loss": 1.6253968477249146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8100023007608024, "compression/movement_sparsity/importance_threshold": -0.0013306935018037556, "compression/movement_sparsity/linear_layer_sparsity": 0.7987788126047419, "compression/movement_sparsity/model_sparsity": 0.7713383030607167, "compression_loss": 86.47492980957031, "distillation_loss": 4.748160362243652, "epoch": 3.28, "learning_rate": 3.735324504555274e-05, "loss": 89.7852, "step": 3876, "task_loss": 3.0472424030303955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8102812988061322, "compression/movement_sparsity/importance_threshold": -0.0013287394734790813, "compression/movement_sparsity/linear_layer_sparsity": 0.7989338506323433, "compression/movement_sparsity/model_sparsity": 0.7714880150551173, "compression_loss": 86.50424194335938, "distillation_loss": 4.278330326080322, "epoch": 3.28, "learning_rate": 3.7348548887010424e-05, "loss": 90.8193, "step": 3877, "task_loss": 2.4647650718688965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8105600235918899, "compression/movement_sparsity/importance_threshold": -0.0013267873589919866, "compression/movement_sparsity/linear_layer_sparsity": 0.7992155590927407, "compression/movement_sparsity/model_sparsity": 0.7717600459632634, "compression_loss": 86.53349304199219, "distillation_loss": 3.979754686355591, "epoch": 3.28, "learning_rate": 3.734385272846812e-05, "loss": 90.3113, "step": 3878, "task_loss": 2.9081761837005615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8108384752519607, "compression/movement_sparsity/importance_threshold": -0.0013248371574047756, "compression/movement_sparsity/linear_layer_sparsity": 0.7994524565311624, "compression/movement_sparsity/model_sparsity": 0.7719888052458935, "compression_loss": 86.56272888183594, "distillation_loss": 4.393070697784424, "epoch": 3.28, "learning_rate": 3.73391565699258e-05, "loss": 90.5074, "step": 3879, "task_loss": 1.6161513328552246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.81111665392023, "compression/movement_sparsity/importance_threshold": -0.0013228888677797503, "compression/movement_sparsity/linear_layer_sparsity": 0.7996014370816048, "compression/movement_sparsity/model_sparsity": 0.7721326678561106, "compression_loss": 86.5919189453125, "distillation_loss": 3.9516634941101074, "epoch": 3.28, "learning_rate": 3.7334460411383496e-05, "loss": 90.3049, "step": 3880, "task_loss": 2.8951797485351562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8113945597305825, "compression/movement_sparsity/importance_threshold": -0.0013209424891792183, "compression/movement_sparsity/linear_layer_sparsity": 0.7998888929908026, "compression/movement_sparsity/model_sparsity": 0.7724102487705096, "compression_loss": 86.62108612060547, "distillation_loss": 2.451702117919922, "epoch": 3.28, "learning_rate": 3.7329764252841176e-05, "loss": 89.7498, "step": 3881, "task_loss": 0.7217500805854797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8116721928169038, "compression/movement_sparsity/importance_threshold": -0.001318998020665479, "compression/movement_sparsity/linear_layer_sparsity": 0.8001335649865229, "compression/movement_sparsity/model_sparsity": 0.7726465155304777, "compression_loss": 86.65023040771484, "distillation_loss": 3.5232186317443848, "epoch": 3.28, "learning_rate": 3.732506809429886e-05, "loss": 90.262, "step": 3882, "task_loss": 1.8981250524520874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8119495533130786, "compression/movement_sparsity/importance_threshold": -0.0013170554613008405, "compression/movement_sparsity/linear_layer_sparsity": 0.8004067715153957, "compression/movement_sparsity/model_sparsity": 0.7729103365746023, "compression_loss": 86.6793441772461, "distillation_loss": 4.564419746398926, "epoch": 3.28, "learning_rate": 3.7320371935756555e-05, "loss": 91.0749, "step": 3883, "task_loss": 3.2512624263763428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8122266413529925, "compression/movement_sparsity/importance_threshold": -0.0013151148101476036, "compression/movement_sparsity/linear_layer_sparsity": 0.8005987267659979, "compression/movement_sparsity/model_sparsity": 0.7730956975718227, "compression_loss": 86.70841979980469, "distillation_loss": 4.30769157409668, "epoch": 3.28, "learning_rate": 3.731567577721424e-05, "loss": 90.0934, "step": 3884, "task_loss": 1.9805569648742676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8125034570705301, "compression/movement_sparsity/importance_threshold": -0.0013131760662680762, "compression/movement_sparsity/linear_layer_sparsity": 0.8008830943157779, "compression/movement_sparsity/model_sparsity": 0.7733702962214509, "compression_loss": 86.73739624023438, "distillation_loss": 3.534207582473755, "epoch": 3.28, "learning_rate": 3.731097961867193e-05, "loss": 90.5623, "step": 3885, "task_loss": 1.7491908073425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.812780000599577, "compression/movement_sparsity/importance_threshold": -0.0013112392287245572, "compression/movement_sparsity/linear_layer_sparsity": 0.8010370830166275, "compression/movement_sparsity/model_sparsity": 0.7735189949367016, "compression_loss": 86.76636505126953, "distillation_loss": 3.468538761138916, "epoch": 3.28, "learning_rate": 3.7306283460129614e-05, "loss": 90.3915, "step": 3886, "task_loss": 2.10611891746521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.813056272074018, "compression/movement_sparsity/importance_threshold": -0.0013093042965793555, "compression/movement_sparsity/linear_layer_sparsity": 0.8011666867946616, "compression/movement_sparsity/model_sparsity": 0.7736441464262526, "compression_loss": 86.79533386230469, "distillation_loss": 3.7590949535369873, "epoch": 3.29, "learning_rate": 3.730158730158731e-05, "loss": 90.5079, "step": 3887, "task_loss": 1.6393036842346191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8133322716277386, "compression/movement_sparsity/importance_threshold": -0.001307371268894771, "compression/movement_sparsity/linear_layer_sparsity": 0.8014187756226514, "compression/movement_sparsity/model_sparsity": 0.7738875752274849, "compression_loss": 86.82425689697266, "distillation_loss": 3.8149752616882324, "epoch": 3.29, "learning_rate": 3.7296891143044994e-05, "loss": 90.3452, "step": 3888, "task_loss": 2.3185791969299316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8136079993946235, "compression/movement_sparsity/importance_threshold": -0.0013054401447331106, "compression/movement_sparsity/linear_layer_sparsity": 0.8016001183640576, "compression/movement_sparsity/model_sparsity": 0.7740626882878483, "compression_loss": 86.8531265258789, "distillation_loss": 3.706239700317383, "epoch": 3.29, "learning_rate": 3.729219498450267e-05, "loss": 90.5185, "step": 3889, "task_loss": 2.384315252304077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8138834555085583, "compression/movement_sparsity/importance_threshold": -0.001303510923156676, "compression/movement_sparsity/linear_layer_sparsity": 0.801704896025074, "compression/movement_sparsity/model_sparsity": 0.7741638665138749, "compression_loss": 86.8820571899414, "distillation_loss": 3.500173807144165, "epoch": 3.29, "learning_rate": 3.7287498825960366e-05, "loss": 90.714, "step": 3890, "task_loss": 2.0333919525146484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8141586401034278, "compression/movement_sparsity/importance_threshold": -0.0013015836032277735, "compression/movement_sparsity/linear_layer_sparsity": 0.8020942081742173, "compression/movement_sparsity/model_sparsity": 0.7745398045930308, "compression_loss": 86.91093444824219, "distillation_loss": 4.866014003753662, "epoch": 3.29, "learning_rate": 3.728280266741805e-05, "loss": 90.7699, "step": 3891, "task_loss": 2.6550991535186768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8144335533131172, "compression/movement_sparsity/importance_threshold": -0.0012996581840087063, "compression/movement_sparsity/linear_layer_sparsity": 0.8023458677321721, "compression/movement_sparsity/model_sparsity": 0.7747828188709747, "compression_loss": 86.93978118896484, "distillation_loss": 3.7373569011688232, "epoch": 3.29, "learning_rate": 3.7278106508875746e-05, "loss": 90.8806, "step": 3892, "task_loss": 2.115870714187622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8147081952715118, "compression/movement_sparsity/importance_threshold": -0.0012977346645617772, "compression/movement_sparsity/linear_layer_sparsity": 0.8025454663742289, "compression/movement_sparsity/model_sparsity": 0.7749755606856394, "compression_loss": 86.96858215332031, "distillation_loss": 4.105489730834961, "epoch": 3.29, "learning_rate": 3.7273410350333425e-05, "loss": 90.4358, "step": 3893, "task_loss": 2.2087836265563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8149825661124966, "compression/movement_sparsity/importance_threshold": -0.001295813043949291, "compression/movement_sparsity/linear_layer_sparsity": 0.8027230172303268, "compression/movement_sparsity/model_sparsity": 0.7751470121236202, "compression_loss": 86.99742126464844, "distillation_loss": 5.379485130310059, "epoch": 3.29, "learning_rate": 3.726871419179112e-05, "loss": 90.8174, "step": 3894, "task_loss": 2.113665819168091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.815256665969957, "compression/movement_sparsity/importance_threshold": -0.001293893321233551, "compression/movement_sparsity/linear_layer_sparsity": 0.8029855516291656, "compression/movement_sparsity/model_sparsity": 0.7754005276582085, "compression_loss": 87.02616882324219, "distillation_loss": 2.650883197784424, "epoch": 3.29, "learning_rate": 3.7264018033248805e-05, "loss": 90.7836, "step": 3895, "task_loss": 0.9732071161270142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8155304949777777, "compression/movement_sparsity/importance_threshold": -0.0012919754954768634, "compression/movement_sparsity/linear_layer_sparsity": 0.8030973049282489, "compression/movement_sparsity/model_sparsity": 0.775508441887675, "compression_loss": 87.05491638183594, "distillation_loss": 2.632117986679077, "epoch": 3.29, "learning_rate": 3.725932187470649e-05, "loss": 90.4978, "step": 3896, "task_loss": 1.5082505941390991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8158040532698443, "compression/movement_sparsity/importance_threshold": -0.0012900595657415288, "compression/movement_sparsity/linear_layer_sparsity": 0.8033497514812677, "compression/movement_sparsity/model_sparsity": 0.7757522161249812, "compression_loss": 87.0836410522461, "distillation_loss": 3.9015860557556152, "epoch": 3.29, "learning_rate": 3.7254625716164184e-05, "loss": 90.3562, "step": 3897, "task_loss": 3.559332847595215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8160773409800414, "compression/movement_sparsity/importance_threshold": -0.0012881455310898553, "compression/movement_sparsity/linear_layer_sparsity": 0.8036510751974261, "compression/movement_sparsity/model_sparsity": 0.7760431884445093, "compression_loss": 87.11227416992188, "distillation_loss": 4.59242582321167, "epoch": 3.29, "learning_rate": 3.7249929557621864e-05, "loss": 91.299, "step": 3898, "task_loss": 1.2761034965515137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8163503582422547, "compression/movement_sparsity/importance_threshold": -0.0012862333905841426, "compression/movement_sparsity/linear_layer_sparsity": 0.8038668907074675, "compression/movement_sparsity/model_sparsity": 0.7762515900278547, "compression_loss": 87.14088439941406, "distillation_loss": 3.761213779449463, "epoch": 3.3, "learning_rate": 3.724523339907956e-05, "loss": 90.4576, "step": 3899, "task_loss": 1.852061152458191 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8166231051903688, "compression/movement_sparsity/importance_threshold": -0.001284323143286698, "compression/movement_sparsity/linear_layer_sparsity": 0.8041046347617914, "compression/movement_sparsity/model_sparsity": 0.7764811668425262, "compression_loss": 87.1695327758789, "distillation_loss": 3.510695457458496, "epoch": 3.3, "learning_rate": 3.724053724053724e-05, "loss": 90.9901, "step": 3900, "task_loss": 3.1880505084991455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8168955819582693, "compression/movement_sparsity/importance_threshold": -0.0012824147882598246, "compression/movement_sparsity/linear_layer_sparsity": 0.8040968482803251, "compression/movement_sparsity/model_sparsity": 0.7764736478506524, "compression_loss": 87.19807434082031, "distillation_loss": 3.4244914054870605, "epoch": 3.3, "learning_rate": 3.723584108199493e-05, "loss": 90.5658, "step": 3901, "task_loss": 2.0118608474731445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8171677886798413, "compression/movement_sparsity/importance_threshold": -0.0012805083245658235, "compression/movement_sparsity/linear_layer_sparsity": 0.8043192459309016, "compression/movement_sparsity/model_sparsity": 0.7766884054577564, "compression_loss": 87.2265853881836, "distillation_loss": 4.648602485656738, "epoch": 3.3, "learning_rate": 3.7231144923452616e-05, "loss": 91.4149, "step": 3902, "task_loss": 3.4610774517059326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8174397254889696, "compression/movement_sparsity/importance_threshold": -0.0012786037512670032, "compression/movement_sparsity/linear_layer_sparsity": 0.8045043567092808, "compression/movement_sparsity/model_sparsity": 0.7768671571114308, "compression_loss": 87.25508117675781, "distillation_loss": 3.6228928565979004, "epoch": 3.3, "learning_rate": 3.72264487649103e-05, "loss": 91.1295, "step": 3903, "task_loss": 1.9063886404037476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8177113925195397, "compression/movement_sparsity/importance_threshold": -0.0012767010674256648, "compression/movement_sparsity/linear_layer_sparsity": 0.8047339923296122, "compression/movement_sparsity/model_sparsity": 0.7770889040417621, "compression_loss": 87.28352355957031, "distillation_loss": 3.057570457458496, "epoch": 3.3, "learning_rate": 3.7221752606367995e-05, "loss": 90.6942, "step": 3904, "task_loss": 2.9712564945220947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8179827899054365, "compression/movement_sparsity/importance_threshold": -0.001274800272104113, "compression/movement_sparsity/linear_layer_sparsity": 0.8049187692312976, "compression/movement_sparsity/model_sparsity": 0.7772673332884343, "compression_loss": 87.31199645996094, "distillation_loss": 3.0461325645446777, "epoch": 3.3, "learning_rate": 3.721705644782568e-05, "loss": 90.8988, "step": 3905, "task_loss": 1.039448618888855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8182539177805455, "compression/movement_sparsity/importance_threshold": -0.0012729013643646503, "compression/movement_sparsity/linear_layer_sparsity": 0.8051465685298131, "compression/movement_sparsity/model_sparsity": 0.7774873069802533, "compression_loss": 87.34043884277344, "distillation_loss": 4.027436256408691, "epoch": 3.3, "learning_rate": 3.721236028928337e-05, "loss": 91.2984, "step": 3906, "task_loss": 1.8307119607925415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8185247762787512, "compression/movement_sparsity/importance_threshold": -0.0012710043432695847, "compression/movement_sparsity/linear_layer_sparsity": 0.8053643873000174, "compression/movement_sparsity/model_sparsity": 0.7776976430056121, "compression_loss": 87.36880493164062, "distillation_loss": 3.299128770828247, "epoch": 3.3, "learning_rate": 3.7207664130741054e-05, "loss": 91.0946, "step": 3907, "task_loss": 1.9460349082946777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8187953655339393, "compression/movement_sparsity/importance_threshold": -0.0012691092078812175, "compression/movement_sparsity/linear_layer_sparsity": 0.8057155898093965, "compression/movement_sparsity/model_sparsity": 0.7780367806283687, "compression_loss": 87.39716339111328, "distillation_loss": 3.5561137199401855, "epoch": 3.3, "learning_rate": 3.720296797219874e-05, "loss": 91.1712, "step": 3908, "task_loss": 1.7611490488052368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8190656856799947, "compression/movement_sparsity/importance_threshold": -0.0012672159572618517, "compression/movement_sparsity/linear_layer_sparsity": 0.8059746304271179, "compression/movement_sparsity/model_sparsity": 0.7782869224039694, "compression_loss": 87.42545318603516, "distillation_loss": 3.0332374572753906, "epoch": 3.3, "learning_rate": 3.7198271813656434e-05, "loss": 91.0947, "step": 3909, "task_loss": 1.8922971487045288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8193357368508025, "compression/movement_sparsity/importance_threshold": -0.001265324590473794, "compression/movement_sparsity/linear_layer_sparsity": 0.8061248153184917, "compression/movement_sparsity/model_sparsity": 0.7784319479823015, "compression_loss": 87.45381927490234, "distillation_loss": 4.923306941986084, "epoch": 3.3, "learning_rate": 3.7193575655114113e-05, "loss": 91.757, "step": 3910, "task_loss": 2.933204174041748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8196055191802482, "compression/movement_sparsity/importance_threshold": -0.0012634351065793446, "compression/movement_sparsity/linear_layer_sparsity": 0.8063399630751455, "compression/movement_sparsity/model_sparsity": 0.7786397047516425, "compression_loss": 87.48209381103516, "distillation_loss": 4.682652473449707, "epoch": 3.31, "learning_rate": 3.7188879496571807e-05, "loss": 91.762, "step": 3911, "task_loss": 1.5890557765960693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8198750328022164, "compression/movement_sparsity/importance_threshold": -0.0012615475046408118, "compression/movement_sparsity/linear_layer_sparsity": 0.8066580163984969, "compression/movement_sparsity/model_sparsity": 0.7789468319648902, "compression_loss": 87.51036071777344, "distillation_loss": 2.591740608215332, "epoch": 3.31, "learning_rate": 3.718418333802949e-05, "loss": 90.7678, "step": 3912, "task_loss": 1.3380464315414429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8201442778505927, "compression/movement_sparsity/importance_threshold": -0.001259661783720496, "compression/movement_sparsity/linear_layer_sparsity": 0.8068641971810886, "compression/movement_sparsity/model_sparsity": 0.7791459298033137, "compression_loss": 87.5385513305664, "distillation_loss": 3.4777636528015137, "epoch": 3.31, "learning_rate": 3.717948717948718e-05, "loss": 90.8966, "step": 3913, "task_loss": 1.8086028099060059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8204132544592619, "compression/movement_sparsity/importance_threshold": -0.001257777942880704, "compression/movement_sparsity/linear_layer_sparsity": 0.8071437235188085, "compression/movement_sparsity/model_sparsity": 0.7794158535514093, "compression_loss": 87.56670379638672, "distillation_loss": 1.9852522611618042, "epoch": 3.31, "learning_rate": 3.717479102094487e-05, "loss": 91.1431, "step": 3914, "task_loss": 1.2358969449996948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8206819627621096, "compression/movement_sparsity/importance_threshold": -0.0012558959811837366, "compression/movement_sparsity/linear_layer_sparsity": 0.8073720355565324, "compression/movement_sparsity/model_sparsity": 0.7796363223682673, "compression_loss": 87.59481811523438, "distillation_loss": 4.498531818389893, "epoch": 3.31, "learning_rate": 3.717009486240255e-05, "loss": 91.622, "step": 3915, "task_loss": 2.271231174468994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8209504028930203, "compression/movement_sparsity/importance_threshold": -0.0012540158976919015, "compression/movement_sparsity/linear_layer_sparsity": 0.8076166360072469, "compression/movement_sparsity/model_sparsity": 0.7798725200410208, "compression_loss": 87.62295532226562, "distillation_loss": 4.416722297668457, "epoch": 3.31, "learning_rate": 3.7165398703860245e-05, "loss": 91.6264, "step": 3916, "task_loss": 3.185558795928955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8212185749858798, "compression/movement_sparsity/importance_threshold": -0.001252137691467499, "compression/movement_sparsity/linear_layer_sparsity": 0.8078009478663943, "compression/movement_sparsity/model_sparsity": 0.780050500220797, "compression_loss": 87.65107727050781, "distillation_loss": 3.485424518585205, "epoch": 3.31, "learning_rate": 3.716070254531793e-05, "loss": 91.2902, "step": 3917, "task_loss": 1.4358606338500977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8214864791745726, "compression/movement_sparsity/importance_threshold": -0.0012502613615728367, "compression/movement_sparsity/linear_layer_sparsity": 0.8079720238994661, "compression/movement_sparsity/model_sparsity": 0.7802156992658412, "compression_loss": 87.67911529541016, "distillation_loss": 3.9715631008148193, "epoch": 3.31, "learning_rate": 3.7156006386775624e-05, "loss": 90.8783, "step": 3918, "task_loss": 1.6224737167358398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8217541155929845, "compression/movement_sparsity/importance_threshold": -0.0012483869070702147, "compression/movement_sparsity/linear_layer_sparsity": 0.8083338866147116, "compression/movement_sparsity/model_sparsity": 0.7805651308835981, "compression_loss": 87.70706939697266, "distillation_loss": 5.6892499923706055, "epoch": 3.31, "learning_rate": 3.7151310228233304e-05, "loss": 92.1404, "step": 3919, "task_loss": 3.364763021469116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.822021484375, "compression/movement_sparsity/importance_threshold": -0.0012465143270219414, "compression/movement_sparsity/linear_layer_sparsity": 0.8085758637485462, "compression/movement_sparsity/model_sparsity": 0.7807987953584768, "compression_loss": 87.73517608642578, "distillation_loss": 2.941068649291992, "epoch": 3.31, "learning_rate": 3.714661406969099e-05, "loss": 91.6932, "step": 3920, "task_loss": 1.8125264644622803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8222885856545046, "compression/movement_sparsity/importance_threshold": -0.0012446436204903181, "compression/movement_sparsity/linear_layer_sparsity": 0.8088810866675213, "compression/movement_sparsity/model_sparsity": 0.7810935329312096, "compression_loss": 87.76311492919922, "distillation_loss": 2.9086735248565674, "epoch": 3.31, "learning_rate": 3.7141917911148683e-05, "loss": 91.5697, "step": 3921, "task_loss": 1.0722906589508057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8225554195653835, "compression/movement_sparsity/importance_threshold": -0.0012427747865376478, "compression/movement_sparsity/linear_layer_sparsity": 0.8091712612869402, "compression/movement_sparsity/model_sparsity": 0.7813737391597698, "compression_loss": 87.79106903076172, "distillation_loss": 3.697805881500244, "epoch": 3.32, "learning_rate": 3.713722175260637e-05, "loss": 91.5629, "step": 3922, "task_loss": 1.5074098110198975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8228219862415216, "compression/movement_sparsity/importance_threshold": -0.0012409078242262362, "compression/movement_sparsity/linear_layer_sparsity": 0.8093744729517905, "compression/movement_sparsity/model_sparsity": 0.7815699698787804, "compression_loss": 87.81896209716797, "distillation_loss": 5.6035661697387695, "epoch": 3.32, "learning_rate": 3.7132525594064056e-05, "loss": 93.1085, "step": 3923, "task_loss": 2.62610125541687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8230882858168044, "compression/movement_sparsity/importance_threshold": -0.0012390427326183862, "compression/movement_sparsity/linear_layer_sparsity": 0.8095981941849746, "compression/movement_sparsity/model_sparsity": 0.7817860055993575, "compression_loss": 87.84691619873047, "distillation_loss": 2.762773275375366, "epoch": 3.32, "learning_rate": 3.712782943552174e-05, "loss": 92.1615, "step": 3924, "task_loss": 2.843660831451416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8233543184251164, "compression/movement_sparsity/importance_threshold": -0.0012371795107764044, "compression/movement_sparsity/linear_layer_sparsity": 0.8098053765976477, "compression/movement_sparsity/model_sparsity": 0.7819860706587877, "compression_loss": 87.87483215332031, "distillation_loss": 4.545276641845703, "epoch": 3.32, "learning_rate": 3.7123133276979436e-05, "loss": 92.016, "step": 3925, "task_loss": 2.5866806507110596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8236200842003436, "compression/movement_sparsity/importance_threshold": -0.0012353181577625903, "compression/movement_sparsity/linear_layer_sparsity": 0.8099839052354918, "compression/movement_sparsity/model_sparsity": 0.7821584662887036, "compression_loss": 87.90270233154297, "distillation_loss": 4.158599376678467, "epoch": 3.32, "learning_rate": 3.711843711843712e-05, "loss": 92.2186, "step": 3926, "task_loss": 2.08990740776062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8238855832763703, "compression/movement_sparsity/importance_threshold": -0.001233458672639253, "compression/movement_sparsity/linear_layer_sparsity": 0.8101314191133151, "compression/movement_sparsity/model_sparsity": 0.7823009126110179, "compression_loss": 87.93062591552734, "distillation_loss": 2.797508955001831, "epoch": 3.32, "learning_rate": 3.711374095989481e-05, "loss": 91.4538, "step": 3927, "task_loss": 1.3206008672714233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8241508157870824, "compression/movement_sparsity/importance_threshold": -0.0012316010544686914, "compression/movement_sparsity/linear_layer_sparsity": 0.8103962310281724, "compression/movement_sparsity/model_sparsity": 0.782556627421943, "compression_loss": 87.95844268798828, "distillation_loss": 4.378626823425293, "epoch": 3.32, "learning_rate": 3.7109044801352495e-05, "loss": 91.7812, "step": 3928, "task_loss": 3.042166233062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8244157818663643, "compression/movement_sparsity/importance_threshold": -0.0012297453023132135, "compression/movement_sparsity/linear_layer_sparsity": 0.8106984848294062, "compression/movement_sparsity/model_sparsity": 0.782848497875263, "compression_loss": 87.98628997802734, "distillation_loss": 5.075044631958008, "epoch": 3.32, "learning_rate": 3.710434864281018e-05, "loss": 92.4558, "step": 3929, "task_loss": 2.5954647064208984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8246804816481018, "compression/movement_sparsity/importance_threshold": -0.0012278914152351206, "compression/movement_sparsity/linear_layer_sparsity": 0.8108832498069238, "compression/movement_sparsity/model_sparsity": 0.7830269156073995, "compression_loss": 88.01397705078125, "distillation_loss": 2.5974600315093994, "epoch": 3.32, "learning_rate": 3.7099652484267874e-05, "loss": 91.7768, "step": 3930, "task_loss": 1.1997915506362915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8249449152661795, "compression/movement_sparsity/importance_threshold": -0.0012260393922967194, "compression/movement_sparsity/linear_layer_sparsity": 0.8111075076276516, "compression/movement_sparsity/model_sparsity": 0.7832434694820873, "compression_loss": 88.04173278808594, "distillation_loss": 3.8856089115142822, "epoch": 3.32, "learning_rate": 3.709495632572556e-05, "loss": 92.229, "step": 3931, "task_loss": 2.5388429164886475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8252090828544829, "compression/movement_sparsity/importance_threshold": -0.0012241892325603118, "compression/movement_sparsity/linear_layer_sparsity": 0.8112844503512001, "compression/movement_sparsity/model_sparsity": 0.7834143336787426, "compression_loss": 88.06937408447266, "distillation_loss": 5.522714614868164, "epoch": 3.32, "learning_rate": 3.709026016718325e-05, "loss": 93.1526, "step": 3932, "task_loss": 1.9521583318710327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8254729845468971, "compression/movement_sparsity/importance_threshold": -0.001222340935088201, "compression/movement_sparsity/linear_layer_sparsity": 0.8114398580279983, "compression/movement_sparsity/model_sparsity": 0.7835644026237528, "compression_loss": 88.0970458984375, "distillation_loss": 3.8998093605041504, "epoch": 3.32, "learning_rate": 3.708556400864093e-05, "loss": 92.1745, "step": 3933, "task_loss": 1.6439554691314697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8257366204773071, "compression/movement_sparsity/importance_threshold": -0.0012204944989426935, "compression/movement_sparsity/linear_layer_sparsity": 0.8115950629939467, "compression/movement_sparsity/model_sparsity": 0.7837142758216545, "compression_loss": 88.1246566772461, "distillation_loss": 4.4539642333984375, "epoch": 3.33, "learning_rate": 3.708086785009862e-05, "loss": 92.6833, "step": 3934, "task_loss": 2.3233323097229004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8259999907795982, "compression/movement_sparsity/importance_threshold": -0.0012186499231860914, "compression/movement_sparsity/linear_layer_sparsity": 0.8117892242155614, "compression/movement_sparsity/model_sparsity": 0.7839017670079971, "compression_loss": 88.15223693847656, "distillation_loss": 4.067781925201416, "epoch": 3.33, "learning_rate": 3.707617169155631e-05, "loss": 92.1285, "step": 3935, "task_loss": 2.8161635398864746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8262630955876553, "compression/movement_sparsity/importance_threshold": -0.0012168072068806995, "compression/movement_sparsity/linear_layer_sparsity": 0.812018859835893, "compression/movement_sparsity/model_sparsity": 0.7841235139383284, "compression_loss": 88.17977905273438, "distillation_loss": 2.5938148498535156, "epoch": 3.33, "learning_rate": 3.707147553301399e-05, "loss": 91.8138, "step": 3936, "task_loss": 1.9563353061676025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8265259350353639, "compression/movement_sparsity/importance_threshold": -0.0012149663490888208, "compression/movement_sparsity/linear_layer_sparsity": 0.8122572954919396, "compression/movement_sparsity/model_sparsity": 0.7843537585960759, "compression_loss": 88.20732116699219, "distillation_loss": 2.8357396125793457, "epoch": 3.33, "learning_rate": 3.7066779374471685e-05, "loss": 92.0158, "step": 3937, "task_loss": 2.137120008468628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8267885092566086, "compression/movement_sparsity/importance_threshold": -0.0012131273488727619, "compression/movement_sparsity/linear_layer_sparsity": 0.8124652052788386, "compression/movement_sparsity/model_sparsity": 0.7845545260421896, "compression_loss": 88.23487091064453, "distillation_loss": 3.7070930004119873, "epoch": 3.33, "learning_rate": 3.706208321592937e-05, "loss": 91.6346, "step": 3938, "task_loss": 1.8734791278839111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8270508183852753, "compression/movement_sparsity/importance_threshold": -0.001211290205294823, "compression/movement_sparsity/linear_layer_sparsity": 0.8126002345531471, "compression/movement_sparsity/model_sparsity": 0.784684916645527, "compression_loss": 88.2623291015625, "distillation_loss": 3.246004581451416, "epoch": 3.33, "learning_rate": 3.705738705738706e-05, "loss": 91.6848, "step": 3939, "task_loss": 1.7143982648849487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8273128625552484, "compression/movement_sparsity/importance_threshold": -0.0012094549174173118, "compression/movement_sparsity/linear_layer_sparsity": 0.8129158672704684, "compression/movement_sparsity/model_sparsity": 0.7849897064080085, "compression_loss": 88.28975677490234, "distillation_loss": 3.7962069511413574, "epoch": 3.33, "learning_rate": 3.7052690898844744e-05, "loss": 92.3236, "step": 3940, "task_loss": 2.2654871940612793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8275746419004133, "compression/movement_sparsity/importance_threshold": -0.0012076214843025302, "compression/movement_sparsity/linear_layer_sparsity": 0.8130339046058958, "compression/movement_sparsity/model_sparsity": 0.7851036887978385, "compression_loss": 88.31714630126953, "distillation_loss": 4.336456298828125, "epoch": 3.33, "learning_rate": 3.704799474030243e-05, "loss": 92.2694, "step": 3941, "task_loss": 2.346320152282715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8278361565546555, "compression/movement_sparsity/importance_threshold": -0.0012057899050127814, "compression/movement_sparsity/linear_layer_sparsity": 0.8132509125327009, "compression/movement_sparsity/model_sparsity": 0.7853132418347635, "compression_loss": 88.34456634521484, "distillation_loss": 4.086164474487305, "epoch": 3.33, "learning_rate": 3.7043298581760124e-05, "loss": 91.9828, "step": 3942, "task_loss": 2.5183610916137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8280974066518596, "compression/movement_sparsity/importance_threshold": -0.0012039601786103709, "compression/movement_sparsity/linear_layer_sparsity": 0.813489300492077, "compression/movement_sparsity/model_sparsity": 0.7855434404343679, "compression_loss": 88.37195587158203, "distillation_loss": 4.417043685913086, "epoch": 3.33, "learning_rate": 3.703860242321781e-05, "loss": 92.134, "step": 3943, "task_loss": 2.0718770027160645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8283583923259111, "compression/movement_sparsity/importance_threshold": -0.0012021323041576017, "compression/movement_sparsity/linear_layer_sparsity": 0.81380179715331, "compression/movement_sparsity/model_sparsity": 0.7858452018739355, "compression_loss": 88.3992919921875, "distillation_loss": 3.059758424758911, "epoch": 3.33, "learning_rate": 3.7033906264675496e-05, "loss": 91.2914, "step": 3944, "task_loss": 2.761995315551758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.828619113710695, "compression/movement_sparsity/importance_threshold": -0.0012003062807167796, "compression/movement_sparsity/linear_layer_sparsity": 0.8141207924859046, "compression/movement_sparsity/model_sparsity": 0.786153238735511, "compression_loss": 88.42668914794922, "distillation_loss": 4.577442646026611, "epoch": 3.33, "learning_rate": 3.702921010613318e-05, "loss": 93.0885, "step": 3945, "task_loss": 2.4807395935058594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8288795709400965, "compression/movement_sparsity/importance_threshold": -0.0011984821073502058, "compression/movement_sparsity/linear_layer_sparsity": 0.814422867424624, "compression/movement_sparsity/model_sparsity": 0.786444936470794, "compression_loss": 88.4539566040039, "distillation_loss": 3.5703845024108887, "epoch": 3.34, "learning_rate": 3.702451394759087e-05, "loss": 92.1028, "step": 3946, "task_loss": 1.8918733596801758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8291397641480005, "compression/movement_sparsity/importance_threshold": -0.0011966597831201868, "compression/movement_sparsity/linear_layer_sparsity": 0.8147567918150987, "compression/movement_sparsity/model_sparsity": 0.7867673895311844, "compression_loss": 88.48118591308594, "distillation_loss": 3.9878056049346924, "epoch": 3.34, "learning_rate": 3.701981778904856e-05, "loss": 92.5515, "step": 3947, "task_loss": 1.6891635656356812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8293996934682926, "compression/movement_sparsity/importance_threshold": -0.0011948393070890248, "compression/movement_sparsity/linear_layer_sparsity": 0.8148781679174641, "compression/movement_sparsity/model_sparsity": 0.786884595991037, "compression_loss": 88.50838470458984, "distillation_loss": 2.475827693939209, "epoch": 3.34, "learning_rate": 3.701512163050625e-05, "loss": 92.3976, "step": 3948, "task_loss": 1.2996852397918701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8296593590348574, "compression/movement_sparsity/importance_threshold": -0.0011930206783190254, "compression/movement_sparsity/linear_layer_sparsity": 0.8150161424611787, "compression/movement_sparsity/model_sparsity": 0.7870178306847156, "compression_loss": 88.5355453491211, "distillation_loss": 4.750632286071777, "epoch": 3.34, "learning_rate": 3.7010425471963935e-05, "loss": 92.9652, "step": 3949, "task_loss": 2.2748804092407227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8299187609815805, "compression/movement_sparsity/importance_threshold": -0.0011912038958724917, "compression/movement_sparsity/linear_layer_sparsity": 0.8151420258989105, "compression/movement_sparsity/model_sparsity": 0.7871393896390987, "compression_loss": 88.56269836425781, "distillation_loss": 4.765791893005371, "epoch": 3.34, "learning_rate": 3.700572931342162e-05, "loss": 92.5154, "step": 3950, "task_loss": 1.50233793258667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8301778994423469, "compression/movement_sparsity/importance_threshold": -0.0011893889588117275, "compression/movement_sparsity/linear_layer_sparsity": 0.8153292591791289, "compression/movement_sparsity/model_sparsity": 0.7873201908801445, "compression_loss": 88.58982849121094, "distillation_loss": 3.5545244216918945, "epoch": 3.34, "learning_rate": 3.7001033154879314e-05, "loss": 91.8625, "step": 3951, "task_loss": 1.6280970573425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8304367745510416, "compression/movement_sparsity/importance_threshold": -0.0011875758661990367, "compression/movement_sparsity/linear_layer_sparsity": 0.8155258648551089, "compression/movement_sparsity/model_sparsity": 0.787510042546325, "compression_loss": 88.6168441772461, "distillation_loss": 5.532506465911865, "epoch": 3.34, "learning_rate": 3.6996336996337e-05, "loss": 92.99, "step": 3952, "task_loss": 2.5115251541137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8306953864415499, "compression/movement_sparsity/importance_threshold": -0.0011857646170967233, "compression/movement_sparsity/linear_layer_sparsity": 0.8157396055599818, "compression/movement_sparsity/model_sparsity": 0.7877164406004421, "compression_loss": 88.64398956298828, "distillation_loss": 4.099217414855957, "epoch": 3.34, "learning_rate": 3.699164083779468e-05, "loss": 92.4702, "step": 3953, "task_loss": 1.847812533378601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8309537352477568, "compression/movement_sparsity/importance_threshold": -0.001183955210567092, "compression/movement_sparsity/linear_layer_sparsity": 0.8158791183213214, "compression/movement_sparsity/model_sparsity": 0.7878511606692382, "compression_loss": 88.67095947265625, "distillation_loss": 3.4225873947143555, "epoch": 3.34, "learning_rate": 3.698694467925237e-05, "loss": 92.3054, "step": 3954, "task_loss": 0.9704834818840027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8312118211035477, "compression/movement_sparsity/importance_threshold": -0.001182147645672445, "compression/movement_sparsity/linear_layer_sparsity": 0.816088578250013, "compression/movement_sparsity/model_sparsity": 0.7880534250050051, "compression_loss": 88.69805908203125, "distillation_loss": 3.9661622047424316, "epoch": 3.34, "learning_rate": 3.698224852071006e-05, "loss": 92.423, "step": 3955, "task_loss": 1.703989863395691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8314696441428073, "compression/movement_sparsity/importance_threshold": -0.0011803419214750904, "compression/movement_sparsity/linear_layer_sparsity": 0.8162609301690218, "compression/movement_sparsity/model_sparsity": 0.7882198561053794, "compression_loss": 88.72504425048828, "distillation_loss": 3.625500202178955, "epoch": 3.34, "learning_rate": 3.697755236216775e-05, "loss": 92.7499, "step": 3956, "task_loss": 2.0203709602355957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8317272044994214, "compression/movement_sparsity/importance_threshold": -0.0011785380370373261, "compression/movement_sparsity/linear_layer_sparsity": 0.8164100418853082, "compression/movement_sparsity/model_sparsity": 0.7883638453754901, "compression_loss": 88.75201416015625, "distillation_loss": 3.6596343517303467, "epoch": 3.34, "learning_rate": 3.697285620362544e-05, "loss": 92.4173, "step": 3957, "task_loss": 1.8166301250457764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8319845023072744, "compression/movement_sparsity/importance_threshold": -0.0011767359914214614, "compression/movement_sparsity/linear_layer_sparsity": 0.8166303170340455, "compression/movement_sparsity/model_sparsity": 0.7885765533952227, "compression_loss": 88.77897644042969, "distillation_loss": 3.64263916015625, "epoch": 3.35, "learning_rate": 3.6968160045083125e-05, "loss": 93.0686, "step": 3958, "task_loss": 1.6418927907943726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.832241537700252, "compression/movement_sparsity/importance_threshold": -0.0011749357836897964, "compression/movement_sparsity/linear_layer_sparsity": 0.8167915914013205, "compression/movement_sparsity/model_sparsity": 0.7887322874918439, "compression_loss": 88.80581665039062, "distillation_loss": 5.406153678894043, "epoch": 3.35, "learning_rate": 3.696346388654081e-05, "loss": 93.4399, "step": 3959, "task_loss": 2.6660962104797363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8324983108122389, "compression/movement_sparsity/importance_threshold": -0.0011731374129046396, "compression/movement_sparsity/linear_layer_sparsity": 0.8171294388429474, "compression/movement_sparsity/model_sparsity": 0.7890585288345106, "compression_loss": 88.83274841308594, "distillation_loss": 4.4663472175598145, "epoch": 3.35, "learning_rate": 3.69587677279985e-05, "loss": 92.892, "step": 3960, "task_loss": 2.2665445804595947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8327548217771207, "compression/movement_sparsity/importance_threshold": -0.0011713408781282896, "compression/movement_sparsity/linear_layer_sparsity": 0.8173594918091461, "compression/movement_sparsity/model_sparsity": 0.7892806787735946, "compression_loss": 88.8595962524414, "distillation_loss": 4.842007160186768, "epoch": 3.35, "learning_rate": 3.695407156945619e-05, "loss": 92.7, "step": 3961, "task_loss": 2.6616220474243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8330110707287821, "compression/movement_sparsity/importance_threshold": -0.0011695461784230555, "compression/movement_sparsity/linear_layer_sparsity": 0.8175645040233096, "compression/movement_sparsity/model_sparsity": 0.7894786481875101, "compression_loss": 88.88639068603516, "distillation_loss": 4.151176452636719, "epoch": 3.35, "learning_rate": 3.694937541091387e-05, "loss": 92.9752, "step": 3962, "task_loss": 3.4933841228485107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8332670578011085, "compression/movement_sparsity/importance_threshold": -0.0011677533128512376, "compression/movement_sparsity/linear_layer_sparsity": 0.8177687173182413, "compression/movement_sparsity/model_sparsity": 0.7896758461275275, "compression_loss": 88.91317749023438, "distillation_loss": 3.627025604248047, "epoch": 3.35, "learning_rate": 3.6944679252371564e-05, "loss": 92.526, "step": 3963, "task_loss": 2.206144094467163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8335227831279851, "compression/movement_sparsity/importance_threshold": -0.001165962280475141, "compression/movement_sparsity/linear_layer_sparsity": 0.8180042554015525, "compression/movement_sparsity/model_sparsity": 0.789903292753077, "compression_loss": 88.93992614746094, "distillation_loss": 4.077983379364014, "epoch": 3.35, "learning_rate": 3.693998309382925e-05, "loss": 93.9092, "step": 3964, "task_loss": 3.2774624824523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8337782468432968, "compression/movement_sparsity/importance_threshold": -0.00116417308035707, "compression/movement_sparsity/linear_layer_sparsity": 0.8182186757839807, "compression/movement_sparsity/model_sparsity": 0.7901103471357345, "compression_loss": 88.96666717529297, "distillation_loss": 5.144759178161621, "epoch": 3.35, "learning_rate": 3.6935286935286937e-05, "loss": 93.5206, "step": 3965, "task_loss": 3.137341260910034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.834033449080929, "compression/movement_sparsity/importance_threshold": -0.0011623857115593272, "compression/movement_sparsity/linear_layer_sparsity": 0.8184172847201235, "compression/movement_sparsity/model_sparsity": 0.7903021332439283, "compression_loss": 88.99337768554688, "distillation_loss": 3.665590763092041, "epoch": 3.35, "learning_rate": 3.693059077674462e-05, "loss": 92.6423, "step": 3966, "task_loss": 2.5177559852600098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8342883899747666, "compression/movement_sparsity/importance_threshold": -0.0011606001731442198, "compression/movement_sparsity/linear_layer_sparsity": 0.8185267366548532, "compression/movement_sparsity/model_sparsity": 0.7904078251679865, "compression_loss": 89.02003479003906, "distillation_loss": 3.1894383430480957, "epoch": 3.35, "learning_rate": 3.692589461820231e-05, "loss": 92.2815, "step": 3967, "task_loss": 1.9407908916473389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8345430696586951, "compression/movement_sparsity/importance_threshold": -0.0011588164641740473, "compression/movement_sparsity/linear_layer_sparsity": 0.8187003525356313, "compression/movement_sparsity/model_sparsity": 0.790575476809155, "compression_loss": 89.04669189453125, "distillation_loss": 6.0429487228393555, "epoch": 3.35, "learning_rate": 3.692119845966e-05, "loss": 93.5953, "step": 3968, "task_loss": 3.563713788986206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.834797488266599, "compression/movement_sparsity/importance_threshold": -0.0011570345837111189, "compression/movement_sparsity/linear_layer_sparsity": 0.8189081192325186, "compression/movement_sparsity/model_sparsity": 0.790776106080839, "compression_loss": 89.07333374023438, "distillation_loss": 5.140173435211182, "epoch": 3.35, "learning_rate": 3.691650230111769e-05, "loss": 93.3749, "step": 3969, "task_loss": 3.711853265762329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8350516459323641, "compression/movement_sparsity/importance_threshold": -0.0011552545308177332, "compression/movement_sparsity/linear_layer_sparsity": 0.8190605697157431, "compression/movement_sparsity/model_sparsity": 0.7909233194209723, "compression_loss": 89.09992218017578, "distillation_loss": 4.1165266036987305, "epoch": 3.36, "learning_rate": 3.6911806142575375e-05, "loss": 93.4851, "step": 3970, "task_loss": 1.076065182685852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8353055427898752, "compression/movement_sparsity/importance_threshold": -0.0011534763045561985, "compression/movement_sparsity/linear_layer_sparsity": 0.819471083034943, "compression/movement_sparsity/model_sparsity": 0.7913197303447709, "compression_loss": 89.12654113769531, "distillation_loss": 4.678439617156982, "epoch": 3.36, "learning_rate": 3.690710998403306e-05, "loss": 93.1489, "step": 3971, "task_loss": 2.504371166229248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8355591789730175, "compression/movement_sparsity/importance_threshold": -0.001151699903988817, "compression/movement_sparsity/linear_layer_sparsity": 0.8196287086069214, "compression/movement_sparsity/model_sparsity": 0.7914719409934389, "compression_loss": 89.15310668945312, "distillation_loss": 4.914548873901367, "epoch": 3.36, "learning_rate": 3.690241382549075e-05, "loss": 93.4603, "step": 3972, "task_loss": 2.8925068378448486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8358125546156762, "compression/movement_sparsity/importance_threshold": -0.0011499253281778915, "compression/movement_sparsity/linear_layer_sparsity": 0.8198363679863, "compression/movement_sparsity/model_sparsity": 0.7916724666343008, "compression_loss": 89.17958068847656, "distillation_loss": 6.253751754760742, "epoch": 3.36, "learning_rate": 3.689771766694844e-05, "loss": 93.8565, "step": 3973, "task_loss": 3.179927110671997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8360656698517362, "compression/movement_sparsity/importance_threshold": -0.0011481525761857288, "compression/movement_sparsity/linear_layer_sparsity": 0.8200846172323111, "compression/movement_sparsity/model_sparsity": 0.7919121877550074, "compression_loss": 89.20613098144531, "distillation_loss": 5.055649757385254, "epoch": 3.36, "learning_rate": 3.689302150840613e-05, "loss": 93.4686, "step": 3974, "task_loss": 2.6819663047790527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8363185248150832, "compression/movement_sparsity/importance_threshold": -0.0011463816470746293, "compression/movement_sparsity/linear_layer_sparsity": 0.8201867596522798, "compression/movement_sparsity/model_sparsity": 0.7920108212686234, "compression_loss": 89.2325668334961, "distillation_loss": 3.6938886642456055, "epoch": 3.36, "learning_rate": 3.6888325349863814e-05, "loss": 93.5699, "step": 3975, "task_loss": 2.969996929168701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8365711196396016, "compression/movement_sparsity/importance_threshold": -0.001144612539906901, "compression/movement_sparsity/linear_layer_sparsity": 0.8203856786167812, "compression/movement_sparsity/model_sparsity": 0.7922029067547479, "compression_loss": 89.25902557373047, "distillation_loss": 4.012026786804199, "epoch": 3.36, "learning_rate": 3.68836291913215e-05, "loss": 92.7972, "step": 3976, "task_loss": 2.1305065155029297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8368234544591773, "compression/movement_sparsity/importance_threshold": -0.0011428452537448437, "compression/movement_sparsity/linear_layer_sparsity": 0.8207295239147291, "compression/movement_sparsity/model_sparsity": 0.7925349399089193, "compression_loss": 89.28539276123047, "distillation_loss": 3.202460289001465, "epoch": 3.36, "learning_rate": 3.6878933032779186e-05, "loss": 93.0733, "step": 3977, "task_loss": 1.9267168045043945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8370755294076948, "compression/movement_sparsity/importance_threshold": -0.0011410797876507655, "compression/movement_sparsity/linear_layer_sparsity": 0.8209687107933368, "compression/movement_sparsity/model_sparsity": 0.792765909982422, "compression_loss": 89.31175231933594, "distillation_loss": 3.961599826812744, "epoch": 3.36, "learning_rate": 3.687423687423688e-05, "loss": 93.6401, "step": 3978, "task_loss": 2.6213057041168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8373273446190396, "compression/movement_sparsity/importance_threshold": -0.0011393161406869669, "compression/movement_sparsity/linear_layer_sparsity": 0.8211236534275972, "compression/movement_sparsity/model_sparsity": 0.7929155298605363, "compression_loss": 89.33808898925781, "distillation_loss": 5.167476654052734, "epoch": 3.36, "learning_rate": 3.686954071569456e-05, "loss": 93.9582, "step": 3979, "task_loss": 2.4182441234588623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8375789002270966, "compression/movement_sparsity/importance_threshold": -0.0011375543119157552, "compression/movement_sparsity/linear_layer_sparsity": 0.8212501449978784, "compression/movement_sparsity/model_sparsity": 0.7930376760562448, "compression_loss": 89.36444091796875, "distillation_loss": 3.967257022857666, "epoch": 3.36, "learning_rate": 3.686484455715225e-05, "loss": 93.3218, "step": 3980, "task_loss": 1.9096957445144653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8378301963657511, "compression/movement_sparsity/importance_threshold": -0.0011357943003994318, "compression/movement_sparsity/linear_layer_sparsity": 0.8214273739014503, "compression/movement_sparsity/model_sparsity": 0.793208816601759, "compression_loss": 89.39071655273438, "distillation_loss": 5.810724258422852, "epoch": 3.36, "learning_rate": 3.686014839860994e-05, "loss": 94.4424, "step": 3981, "task_loss": 2.1504719257354736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8380812331688884, "compression/movement_sparsity/importance_threshold": -0.0011340361052003005, "compression/movement_sparsity/linear_layer_sparsity": 0.8216943440906497, "compression/movement_sparsity/model_sparsity": 0.793466615543663, "compression_loss": 89.41697692871094, "distillation_loss": 6.392257213592529, "epoch": 3.37, "learning_rate": 3.685545224006763e-05, "loss": 94.1482, "step": 3982, "task_loss": 3.4417433738708496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8383320107703933, "compression/movement_sparsity/importance_threshold": -0.001132279725380666, "compression/movement_sparsity/linear_layer_sparsity": 0.8218652889578774, "compression/movement_sparsity/model_sparsity": 0.7936316879288136, "compression_loss": 89.4432601928711, "distillation_loss": 5.075407028198242, "epoch": 3.37, "learning_rate": 3.685075608152531e-05, "loss": 94.5565, "step": 3983, "task_loss": 1.8612474203109741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8385825293041512, "compression/movement_sparsity/importance_threshold": -0.0011305251600028333, "compression/movement_sparsity/linear_layer_sparsity": 0.8219223818725179, "compression/movement_sparsity/model_sparsity": 0.7936868195261978, "compression_loss": 89.46949005126953, "distillation_loss": 4.514585018157959, "epoch": 3.37, "learning_rate": 3.6846059922983e-05, "loss": 93.2395, "step": 3984, "task_loss": 3.5320873260498047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.838832788904047, "compression/movement_sparsity/importance_threshold": -0.0011287724081291061, "compression/movement_sparsity/linear_layer_sparsity": 0.8221175208758787, "compression/movement_sparsity/model_sparsity": 0.7938752549044754, "compression_loss": 89.49565887451172, "distillation_loss": 3.862992763519287, "epoch": 3.37, "learning_rate": 3.684136376444069e-05, "loss": 93.8631, "step": 3985, "task_loss": 2.990511178970337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8390827897039662, "compression/movement_sparsity/importance_threshold": -0.0011270214688217867, "compression/movement_sparsity/linear_layer_sparsity": 0.8222458606921436, "compression/movement_sparsity/model_sparsity": 0.7939991858532321, "compression_loss": 89.52183532714844, "distillation_loss": 5.26555871963501, "epoch": 3.37, "learning_rate": 3.683666760589838e-05, "loss": 93.784, "step": 3986, "task_loss": 3.657965660095215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8393325318377935, "compression/movement_sparsity/importance_threshold": -0.0011252723411431806, "compression/movement_sparsity/linear_layer_sparsity": 0.8223731034849858, "compression/movement_sparsity/model_sparsity": 0.7941220574646957, "compression_loss": 89.54790496826172, "distillation_loss": 5.825346946716309, "epoch": 3.37, "learning_rate": 3.683197144735606e-05, "loss": 95.0281, "step": 3987, "task_loss": 2.6280922889709473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8395820154394146, "compression/movement_sparsity/importance_threshold": -0.001123525024155591, "compression/movement_sparsity/linear_layer_sparsity": 0.8224808383395759, "compression/movement_sparsity/model_sparsity": 0.7942260912955993, "compression_loss": 89.57408905029297, "distillation_loss": 4.264525413513184, "epoch": 3.37, "learning_rate": 3.682727528881375e-05, "loss": 93.5911, "step": 3988, "task_loss": 1.6111263036727905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.839831240642714, "compression/movement_sparsity/importance_threshold": -0.0011217795169213233, "compression/movement_sparsity/linear_layer_sparsity": 0.8226116703068765, "compression/movement_sparsity/model_sparsity": 0.794352428782337, "compression_loss": 89.60014343261719, "distillation_loss": 4.176291465759277, "epoch": 3.37, "learning_rate": 3.682257913027144e-05, "loss": 93.6193, "step": 3989, "task_loss": 2.489731550216675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8400802075815771, "compression/movement_sparsity/importance_threshold": -0.0011200358185026807, "compression/movement_sparsity/linear_layer_sparsity": 0.8228172906535894, "compression/movement_sparsity/model_sparsity": 0.7945509854375781, "compression_loss": 89.626220703125, "distillation_loss": 5.954404830932617, "epoch": 3.37, "learning_rate": 3.681788297172913e-05, "loss": 94.4639, "step": 3990, "task_loss": 2.7733876705169678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8403289163898893, "compression/movement_sparsity/importance_threshold": -0.0011182939279619653, "compression/movement_sparsity/linear_layer_sparsity": 0.8230489533824189, "compression/movement_sparsity/model_sparsity": 0.7947746898389945, "compression_loss": 89.65226745605469, "distillation_loss": 5.770582675933838, "epoch": 3.37, "learning_rate": 3.6813186813186815e-05, "loss": 94.5703, "step": 3991, "task_loss": 2.4439711570739746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8405773672015353, "compression/movement_sparsity/importance_threshold": -0.0011165538443614845, "compression/movement_sparsity/linear_layer_sparsity": 0.8231905528730948, "compression/movement_sparsity/model_sparsity": 0.7949114249515546, "compression_loss": 89.67829895019531, "distillation_loss": 4.157776355743408, "epoch": 3.37, "learning_rate": 3.68084906546445e-05, "loss": 93.804, "step": 3992, "task_loss": 2.197361946105957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8408255601504008, "compression/movement_sparsity/importance_threshold": -0.0011148155667635388, "compression/movement_sparsity/linear_layer_sparsity": 0.8234133082487004, "compression/movement_sparsity/model_sparsity": 0.7951265279947324, "compression_loss": 89.70433044433594, "distillation_loss": 6.018702507019043, "epoch": 3.38, "learning_rate": 3.680379449610219e-05, "loss": 94.104, "step": 3993, "task_loss": 3.0476107597351074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8410734953703702, "compression/movement_sparsity/importance_threshold": -0.0011130790942304354, "compression/movement_sparsity/linear_layer_sparsity": 0.8236222673623512, "compression/movement_sparsity/model_sparsity": 0.7953283087199959, "compression_loss": 89.73027801513672, "distillation_loss": 4.065033912658691, "epoch": 3.38, "learning_rate": 3.679909833755988e-05, "loss": 93.8287, "step": 3994, "task_loss": 1.5901000499725342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8413211729953295, "compression/movement_sparsity/importance_threshold": -0.0011113444258244749, "compression/movement_sparsity/linear_layer_sparsity": 0.8238350660579808, "compression/movement_sparsity/model_sparsity": 0.7955337971257853, "compression_loss": 89.7562484741211, "distillation_loss": 4.049321174621582, "epoch": 3.38, "learning_rate": 3.679440217901757e-05, "loss": 93.8094, "step": 3995, "task_loss": 1.7693833112716675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.841568593159163, "compression/movement_sparsity/importance_threshold": -0.0011096115606079654, "compression/movement_sparsity/linear_layer_sparsity": 0.8239321884033749, "compression/movement_sparsity/model_sparsity": 0.7956275830198318, "compression_loss": 89.78217315673828, "distillation_loss": 3.4872629642486572, "epoch": 3.38, "learning_rate": 3.6789706020475254e-05, "loss": 93.201, "step": 3996, "task_loss": 1.6915392875671387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8418157559957565, "compression/movement_sparsity/importance_threshold": -0.0011078804976432056, "compression/movement_sparsity/linear_layer_sparsity": 0.8240835776356799, "compression/movement_sparsity/model_sparsity": 0.7957737715662794, "compression_loss": 89.80810546875, "distillation_loss": 5.2877197265625, "epoch": 3.38, "learning_rate": 3.678500986193294e-05, "loss": 94.4685, "step": 3997, "task_loss": 2.872746706008911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8420626616389947, "compression/movement_sparsity/importance_threshold": -0.0011061512359925056, "compression/movement_sparsity/linear_layer_sparsity": 0.8242061223064736, "compression/movement_sparsity/model_sparsity": 0.7958921064506399, "compression_loss": 89.83394622802734, "distillation_loss": 3.74898099899292, "epoch": 3.38, "learning_rate": 3.6780313703390626e-05, "loss": 93.6162, "step": 3998, "task_loss": 2.2747697830200195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.842309310222763, "compression/movement_sparsity/importance_threshold": -0.0011044237747181648, "compression/movement_sparsity/linear_layer_sparsity": 0.8243634020775906, "compression/movement_sparsity/model_sparsity": 0.7960439831777698, "compression_loss": 89.85975646972656, "distillation_loss": 6.204662322998047, "epoch": 3.38, "learning_rate": 3.677561754484832e-05, "loss": 93.4307, "step": 3999, "task_loss": 2.6401376724243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8425557018809464, "compression/movement_sparsity/importance_threshold": -0.0011026981128824881, "compression/movement_sparsity/linear_layer_sparsity": 0.8245461757191132, "compression/movement_sparsity/model_sparsity": 0.7962204779824286, "compression_loss": 89.88553619384766, "distillation_loss": 4.542688369750977, "epoch": 3.38, "learning_rate": 3.6770921386306e-05, "loss": 93.7523, "step": 4000, "task_loss": 2.1997807025909424 }, { "epoch": 3.38, "eval_accuracy": 0.5992475247524752, "eval_loss": 93.37529754638672, "eval_runtime": 311.0661, "eval_samples_per_second": 81.172, "eval_steps_per_second": 0.637, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8428018367474303, "compression/movement_sparsity/importance_threshold": -0.0011009742495477802, "compression/movement_sparsity/linear_layer_sparsity": 0.8247485288438937, "compression/movement_sparsity/model_sparsity": 0.7964158796548619, "compression_loss": 89.91133117675781, "distillation_loss": 4.378180503845215, "epoch": 3.38, "learning_rate": 3.676622522776369e-05, "loss": 93.3666, "step": 4001, "task_loss": 1.712186336517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8430477149560993, "compression/movement_sparsity/importance_threshold": -0.001099252183776346, "compression/movement_sparsity/linear_layer_sparsity": 0.8249674565616882, "compression/movement_sparsity/model_sparsity": 0.7966272865320497, "compression_loss": 89.93704986572266, "distillation_loss": 5.9553751945495605, "epoch": 3.38, "learning_rate": 3.676152906922138e-05, "loss": 94.2253, "step": 4002, "task_loss": 4.005069732666016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8432933366408389, "compression/movement_sparsity/importance_threshold": -0.0010975319146304882, "compression/movement_sparsity/linear_layer_sparsity": 0.8251431114751321, "compression/movement_sparsity/model_sparsity": 0.7967969071588391, "compression_loss": 89.9627456665039, "distillation_loss": 3.6137094497680664, "epoch": 3.38, "learning_rate": 3.6756832910679065e-05, "loss": 94.4025, "step": 4003, "task_loss": 2.1843485832214355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8435387019355345, "compression/movement_sparsity/importance_threshold": -0.0010958134411725102, "compression/movement_sparsity/linear_layer_sparsity": 0.8253232021789365, "compression/movement_sparsity/model_sparsity": 0.796970811192944, "compression_loss": 89.98843383789062, "distillation_loss": 4.9405717849731445, "epoch": 3.38, "learning_rate": 3.675213675213676e-05, "loss": 94.1285, "step": 4004, "task_loss": 2.347391366958618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8437838109740706, "compression/movement_sparsity/importance_threshold": -0.0010940967624647182, "compression/movement_sparsity/linear_layer_sparsity": 0.8254915118051167, "compression/movement_sparsity/model_sparsity": 0.7971333388656839, "compression_loss": 90.01409912109375, "distillation_loss": 4.22064208984375, "epoch": 3.39, "learning_rate": 3.674744059359444e-05, "loss": 93.6736, "step": 4005, "task_loss": 2.274905204772949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.844028663890333, "compression/movement_sparsity/importance_threshold": -0.0010923818775694129, "compression/movement_sparsity/linear_layer_sparsity": 0.8257831292488194, "compression/movement_sparsity/model_sparsity": 0.7974149383530753, "compression_loss": 90.03973388671875, "distillation_loss": 4.685396671295166, "epoch": 3.39, "learning_rate": 3.674274443505213e-05, "loss": 93.5858, "step": 4006, "task_loss": 1.545034408569336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8442732608182064, "compression/movement_sparsity/importance_threshold": -0.0010906687855489006, "compression/movement_sparsity/linear_layer_sparsity": 0.8260030466725277, "compression/movement_sparsity/model_sparsity": 0.797627300936734, "compression_loss": 90.06533813476562, "distillation_loss": 3.726595163345337, "epoch": 3.39, "learning_rate": 3.673804827650982e-05, "loss": 94.2731, "step": 4007, "task_loss": 2.017807960510254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8445176018915762, "compression/movement_sparsity/importance_threshold": -0.0010889574854654844, "compression/movement_sparsity/linear_layer_sparsity": 0.8260976649427182, "compression/movement_sparsity/model_sparsity": 0.7977186687782637, "compression_loss": 90.09091186523438, "distillation_loss": 4.626942157745361, "epoch": 3.39, "learning_rate": 3.6733352117967503e-05, "loss": 95.0854, "step": 4008, "task_loss": 2.6714439392089844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8447616872443272, "compression/movement_sparsity/importance_threshold": -0.00108724797638147, "compression/movement_sparsity/linear_layer_sparsity": 0.8263725050825572, "compression/movement_sparsity/model_sparsity": 0.797984067313792, "compression_loss": 90.11647033691406, "distillation_loss": 5.864928245544434, "epoch": 3.39, "learning_rate": 3.672865595942519e-05, "loss": 93.9198, "step": 4009, "task_loss": 3.1061229705810547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.845005517010345, "compression/movement_sparsity/importance_threshold": -0.0010855402573591586, "compression/movement_sparsity/linear_layer_sparsity": 0.826501011837169, "compression/movement_sparsity/model_sparsity": 0.7981081594660497, "compression_loss": 90.1419448852539, "distillation_loss": 4.828771114349365, "epoch": 3.39, "learning_rate": 3.6723959800882876e-05, "loss": 94.4144, "step": 4010, "task_loss": 1.9714725017547607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8452490913235142, "compression/movement_sparsity/importance_threshold": -0.0010838343274608576, "compression/movement_sparsity/linear_layer_sparsity": 0.8266318676528048, "compression/movement_sparsity/model_sparsity": 0.7982345199818591, "compression_loss": 90.16741943359375, "distillation_loss": 4.570655822753906, "epoch": 3.39, "learning_rate": 3.671926364234057e-05, "loss": 94.5963, "step": 4011, "task_loss": 3.6325502395629883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8454924103177204, "compression/movement_sparsity/importance_threshold": -0.0010821301857488683, "compression/movement_sparsity/linear_layer_sparsity": 0.8268041837993108, "compression/movement_sparsity/model_sparsity": 0.798400916538626, "compression_loss": 90.19284057617188, "distillation_loss": 3.2912402153015137, "epoch": 3.39, "learning_rate": 3.6714567483798256e-05, "loss": 94.4459, "step": 4012, "task_loss": 2.136094570159912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8457354741268487, "compression/movement_sparsity/importance_threshold": -0.0010804278312854937, "compression/movement_sparsity/linear_layer_sparsity": 0.8271148918353984, "compression/movement_sparsity/model_sparsity": 0.7987009507978243, "compression_loss": 90.21829986572266, "distillation_loss": 5.338934898376465, "epoch": 3.39, "learning_rate": 3.670987132525594e-05, "loss": 93.4635, "step": 4013, "task_loss": 2.87656831741333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8459782828847839, "compression/movement_sparsity/importance_threshold": -0.0010787272631330412, "compression/movement_sparsity/linear_layer_sparsity": 0.827284465423348, "compression/movement_sparsity/model_sparsity": 0.7988646990113585, "compression_loss": 90.24361419677734, "distillation_loss": 3.7629055976867676, "epoch": 3.39, "learning_rate": 3.670517516671363e-05, "loss": 94.7441, "step": 4014, "task_loss": 2.2653439044952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8462208367254116, "compression/movement_sparsity/importance_threshold": -0.0010770284803538113, "compression/movement_sparsity/linear_layer_sparsity": 0.8274868543206315, "compression/movement_sparsity/model_sparsity": 0.7990601352273993, "compression_loss": 90.26899719238281, "distillation_loss": 3.008218765258789, "epoch": 3.39, "learning_rate": 3.6700479008171315e-05, "loss": 93.7245, "step": 4015, "task_loss": 2.6759941577911377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8464631357826164, "compression/movement_sparsity/importance_threshold": -0.0010753314820101112, "compression/movement_sparsity/linear_layer_sparsity": 0.8278032978813521, "compression/movement_sparsity/model_sparsity": 0.7993657079783147, "compression_loss": 90.29431915283203, "distillation_loss": 5.495811462402344, "epoch": 3.39, "learning_rate": 3.669578284962901e-05, "loss": 94.644, "step": 4016, "task_loss": 3.655524492263794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8467051801902841, "compression/movement_sparsity/importance_threshold": -0.0010736362671642423, "compression/movement_sparsity/linear_layer_sparsity": 0.8279653473195234, "compression/movement_sparsity/model_sparsity": 0.7995221905197626, "compression_loss": 90.31966400146484, "distillation_loss": 4.061117172241211, "epoch": 3.4, "learning_rate": 3.669108669108669e-05, "loss": 94.937, "step": 4017, "task_loss": 3.019972801208496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8469469700822992, "compression/movement_sparsity/importance_threshold": -0.0010719428348785111, "compression/movement_sparsity/linear_layer_sparsity": 0.8280224640824992, "compression/movement_sparsity/model_sparsity": 0.7995773451462185, "compression_loss": 90.34500885009766, "distillation_loss": 4.476384162902832, "epoch": 3.4, "learning_rate": 3.668639053254438e-05, "loss": 94.8489, "step": 4018, "task_loss": 2.9321508407592773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8471885055925473, "compression/movement_sparsity/importance_threshold": -0.001070251184215219, "compression/movement_sparsity/linear_layer_sparsity": 0.828236598284904, "compression/movement_sparsity/model_sparsity": 0.7997841231800168, "compression_loss": 90.37025451660156, "distillation_loss": 4.159713268280029, "epoch": 3.4, "learning_rate": 3.668169437400207e-05, "loss": 94.3754, "step": 4019, "task_loss": 2.629488945007324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8474297868549132, "compression/movement_sparsity/importance_threshold": -0.0010685613142366714, "compression/movement_sparsity/linear_layer_sparsity": 0.8284583878029311, "compression/movement_sparsity/model_sparsity": 0.7999982935457952, "compression_loss": 90.39547729492188, "distillation_loss": 4.1260576248168945, "epoch": 3.4, "learning_rate": 3.667699821545976e-05, "loss": 94.1859, "step": 4020, "task_loss": 1.9738210439682007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8476708140032823, "compression/movement_sparsity/importance_threshold": -0.0010668732240051724, "compression/movement_sparsity/linear_layer_sparsity": 0.8286815724485715, "compression/movement_sparsity/model_sparsity": 0.8002138111122616, "compression_loss": 90.42064666748047, "distillation_loss": 3.8680572509765625, "epoch": 3.4, "learning_rate": 3.6672302056917446e-05, "loss": 94.9111, "step": 4021, "task_loss": 2.539458990097046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8479115871715397, "compression/movement_sparsity/importance_threshold": -0.0010651869125830249, "compression/movement_sparsity/linear_layer_sparsity": 0.8288818269198482, "compression/movement_sparsity/model_sparsity": 0.8004071862263952, "compression_loss": 90.44585418701172, "distillation_loss": 3.463296890258789, "epoch": 3.4, "learning_rate": 3.666760589837513e-05, "loss": 94.6061, "step": 4022, "task_loss": 1.7755612134933472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8481521064935703, "compression/movement_sparsity/importance_threshold": -0.0010635023790325345, "compression/movement_sparsity/linear_layer_sparsity": 0.8291031394711699, "compression/movement_sparsity/model_sparsity": 0.8006208960107418, "compression_loss": 90.47101593017578, "distillation_loss": 2.89973783493042, "epoch": 3.4, "learning_rate": 3.666290973983282e-05, "loss": 93.7827, "step": 4023, "task_loss": 1.9414618015289307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8483923721032597, "compression/movement_sparsity/importance_threshold": -0.0010618196224160027, "compression/movement_sparsity/linear_layer_sparsity": 0.8293222102789759, "compression/movement_sparsity/model_sparsity": 0.8008324410623592, "compression_loss": 90.49610900878906, "distillation_loss": 4.550585746765137, "epoch": 3.4, "learning_rate": 3.6658213581290505e-05, "loss": 94.8838, "step": 4024, "task_loss": 3.320986747741699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8486323841344925, "compression/movement_sparsity/importance_threshold": -0.0010601386417957367, "compression/movement_sparsity/linear_layer_sparsity": 0.8294404622494208, "compression/movement_sparsity/model_sparsity": 0.8009466307138337, "compression_loss": 90.52119445800781, "distillation_loss": 3.6590003967285156, "epoch": 3.4, "learning_rate": 3.66535174227482e-05, "loss": 94.7263, "step": 4025, "task_loss": 1.0616800785064697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8488721427211543, "compression/movement_sparsity/importance_threshold": -0.001058459436234038, "compression/movement_sparsity/linear_layer_sparsity": 0.8296288044772292, "compression/movement_sparsity/model_sparsity": 0.8011285028067084, "compression_loss": 90.5462646484375, "distillation_loss": 3.410371780395508, "epoch": 3.4, "learning_rate": 3.664882126420588e-05, "loss": 94.5795, "step": 4026, "task_loss": 2.033132791519165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8491116479971299, "compression/movement_sparsity/importance_threshold": -0.0010567820047932119, "compression/movement_sparsity/linear_layer_sparsity": 0.8297351442042058, "compression/movement_sparsity/model_sparsity": 0.8012311894369241, "compression_loss": 90.5712890625, "distillation_loss": 4.2272820472717285, "epoch": 3.4, "learning_rate": 3.664412510566357e-05, "loss": 95.4442, "step": 4027, "task_loss": 3.7124855518341064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8493509000963049, "compression/movement_sparsity/importance_threshold": -0.00105510634653556, "compression/movement_sparsity/linear_layer_sparsity": 0.8298981237274529, "compression/movement_sparsity/model_sparsity": 0.8013885701121639, "compression_loss": 90.59623718261719, "distillation_loss": 3.327826976776123, "epoch": 3.4, "learning_rate": 3.663942894712126e-05, "loss": 94.3846, "step": 4028, "task_loss": 1.7476540803909302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8495898991525638, "compression/movement_sparsity/importance_threshold": -0.0010534324605233894, "compression/movement_sparsity/linear_layer_sparsity": 0.8300068125154537, "compression/movement_sparsity/model_sparsity": 0.8014935251059312, "compression_loss": 90.62123107910156, "distillation_loss": 4.16901969909668, "epoch": 3.41, "learning_rate": 3.6634732788578944e-05, "loss": 94.4001, "step": 4029, "task_loss": 2.4603734016418457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8498286452997922, "compression/movement_sparsity/importance_threshold": -0.0010517603458190024, "compression/movement_sparsity/linear_layer_sparsity": 0.8302250009348547, "compression/movement_sparsity/model_sparsity": 0.8017042180818996, "compression_loss": 90.64610290527344, "distillation_loss": 4.509100914001465, "epoch": 3.41, "learning_rate": 3.663003663003663e-05, "loss": 94.8197, "step": 4030, "task_loss": 2.9440319538116455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850067138671875, "compression/movement_sparsity/importance_threshold": -0.0010500900014847048, "compression/movement_sparsity/linear_layer_sparsity": 0.8305486705411627, "compression/movement_sparsity/model_sparsity": 0.8020167686415066, "compression_loss": 90.67100524902344, "distillation_loss": 5.5777997970581055, "epoch": 3.41, "learning_rate": 3.6625340471494316e-05, "loss": 95.1193, "step": 4031, "task_loss": 2.953101873397827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8503053794026975, "compression/movement_sparsity/importance_threshold": -0.0010484214265827985, "compression/movement_sparsity/linear_layer_sparsity": 0.8307230137961665, "compression/movement_sparsity/model_sparsity": 0.8021851226693586, "compression_loss": 90.69589233398438, "distillation_loss": 4.493691921234131, "epoch": 3.41, "learning_rate": 3.662064431295201e-05, "loss": 94.8027, "step": 4032, "task_loss": 3.440406322479248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8505433676261449, "compression/movement_sparsity/importance_threshold": -0.0010467546201755875, "compression/movement_sparsity/linear_layer_sparsity": 0.8308071626471728, "compression/movement_sparsity/model_sparsity": 0.8022663807484607, "compression_loss": 90.72071838378906, "distillation_loss": 4.116029262542725, "epoch": 3.41, "learning_rate": 3.6615948154409696e-05, "loss": 95.1318, "step": 4033, "task_loss": 1.9901171922683716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.850781103476102, "compression/movement_sparsity/importance_threshold": -0.0010450895813253775, "compression/movement_sparsity/linear_layer_sparsity": 0.8310351288840353, "compression/movement_sparsity/model_sparsity": 0.8024865156437807, "compression_loss": 90.74551391601562, "distillation_loss": 3.526010036468506, "epoch": 3.41, "learning_rate": 3.661125199586738e-05, "loss": 95.1513, "step": 4034, "task_loss": 3.065296173095703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8510185870864545, "compression/movement_sparsity/importance_threshold": -0.0010434263090944697, "compression/movement_sparsity/linear_layer_sparsity": 0.8311413612935031, "compression/movement_sparsity/model_sparsity": 0.8025890986431743, "compression_loss": 90.77027130126953, "distillation_loss": 3.5066988468170166, "epoch": 3.41, "learning_rate": 3.660655583732507e-05, "loss": 94.3973, "step": 4035, "task_loss": 2.283299446105957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8512558185910869, "compression/movement_sparsity/importance_threshold": -0.0010417648025451715, "compression/movement_sparsity/linear_layer_sparsity": 0.8312788946430152, "compression/movement_sparsity/model_sparsity": 0.8027219072990286, "compression_loss": 90.7950210571289, "distillation_loss": 4.981856346130371, "epoch": 3.41, "learning_rate": 3.6601859678782755e-05, "loss": 94.8677, "step": 4036, "task_loss": 3.1459848880767822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8514927981238849, "compression/movement_sparsity/importance_threshold": -0.0010401050607397842, "compression/movement_sparsity/linear_layer_sparsity": 0.8314839307055139, "compression/movement_sparsity/model_sparsity": 0.8029198997420157, "compression_loss": 90.81974029541016, "distillation_loss": 2.9590530395507812, "epoch": 3.41, "learning_rate": 3.659716352024045e-05, "loss": 94.5023, "step": 4037, "task_loss": 1.815351963043213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8517295258187332, "compression/movement_sparsity/importance_threshold": -0.0010384470827406135, "compression/movement_sparsity/linear_layer_sparsity": 0.8317092855496641, "compression/movement_sparsity/model_sparsity": 0.8031375129539967, "compression_loss": 90.84449005126953, "distillation_loss": 5.350484371185303, "epoch": 3.41, "learning_rate": 3.6592467361698134e-05, "loss": 95.3318, "step": 4038, "task_loss": 3.2526042461395264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8519660018095174, "compression/movement_sparsity/importance_threshold": -0.0010367908676099605, "compression/movement_sparsity/linear_layer_sparsity": 0.8318864071357271, "compression/movement_sparsity/model_sparsity": 0.8033085498686888, "compression_loss": 90.8691177368164, "distillation_loss": 3.8340611457824707, "epoch": 3.41, "learning_rate": 3.658777120315582e-05, "loss": 94.924, "step": 4039, "task_loss": 2.467369794845581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8522022262301221, "compression/movement_sparsity/importance_threshold": -0.0010351364144101337, "compression/movement_sparsity/linear_layer_sparsity": 0.8320084271431449, "compression/movement_sparsity/model_sparsity": 0.8034263781134744, "compression_loss": 90.89376831054688, "distillation_loss": 3.892963409423828, "epoch": 3.41, "learning_rate": 3.658307504461351e-05, "loss": 94.4309, "step": 4040, "task_loss": 1.9918533563613892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.852438199214433, "compression/movement_sparsity/importance_threshold": -0.0010334837222034333, "compression/movement_sparsity/linear_layer_sparsity": 0.8321930013339804, "compression/movement_sparsity/model_sparsity": 0.8036046116130381, "compression_loss": 90.91842651367188, "distillation_loss": 4.417028903961182, "epoch": 3.42, "learning_rate": 3.657837888607119e-05, "loss": 94.5218, "step": 4041, "task_loss": 2.805022716522217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8526739208963346, "compression/movement_sparsity/importance_threshold": -0.001031832790052166, "compression/movement_sparsity/linear_layer_sparsity": 0.8324261307354293, "compression/movement_sparsity/model_sparsity": 0.8038297323023571, "compression_loss": 90.9429702758789, "distillation_loss": 4.495639801025391, "epoch": 3.42, "learning_rate": 3.6573682727528886e-05, "loss": 94.9987, "step": 4042, "task_loss": 1.9722598791122437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8529093914097126, "compression/movement_sparsity/importance_threshold": -0.0010301836170186337, "compression/movement_sparsity/linear_layer_sparsity": 0.8326292350827709, "compression/movement_sparsity/model_sparsity": 0.8040258593905455, "compression_loss": 90.967529296875, "distillation_loss": 4.092824935913086, "epoch": 3.42, "learning_rate": 3.6568986568986566e-05, "loss": 95.24, "step": 4043, "task_loss": 2.367692232131958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.853144610888452, "compression/movement_sparsity/importance_threshold": -0.0010285362021651406, "compression/movement_sparsity/linear_layer_sparsity": 0.832686375694082, "compression/movement_sparsity/model_sparsity": 0.8040810370460729, "compression_loss": 90.99202728271484, "distillation_loss": 4.864141464233398, "epoch": 3.42, "learning_rate": 3.656429041044426e-05, "loss": 95.4717, "step": 4044, "task_loss": 2.0035738945007324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8533795794664376, "compression/movement_sparsity/importance_threshold": -0.001026890544553993, "compression/movement_sparsity/linear_layer_sparsity": 0.8328888719088742, "compression/movement_sparsity/model_sparsity": 0.8042765768929359, "compression_loss": 91.01655578613281, "distillation_loss": 4.6106977462768555, "epoch": 3.42, "learning_rate": 3.6559594251901945e-05, "loss": 95.2744, "step": 4045, "task_loss": 2.313793897628784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8536142972775551, "compression/movement_sparsity/importance_threshold": -0.0010252466432474914, "compression/movement_sparsity/linear_layer_sparsity": 0.8329748213091934, "compression/movement_sparsity/model_sparsity": 0.8043595736669429, "compression_loss": 91.04108428955078, "distillation_loss": 4.225437164306641, "epoch": 3.42, "learning_rate": 3.655489809335964e-05, "loss": 94.9898, "step": 4046, "task_loss": 3.497863531112671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8538487644556892, "compression/movement_sparsity/importance_threshold": -0.0010236044973079414, "compression/movement_sparsity/linear_layer_sparsity": 0.833165619915535, "compression/movement_sparsity/model_sparsity": 0.8045438177541914, "compression_loss": 91.06556701660156, "distillation_loss": 4.941924095153809, "epoch": 3.42, "learning_rate": 3.655020193481732e-05, "loss": 95.3346, "step": 4047, "task_loss": 2.1771092414855957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8540829811347255, "compression/movement_sparsity/importance_threshold": -0.0010219641057976469, "compression/movement_sparsity/linear_layer_sparsity": 0.8334211906004744, "compression/movement_sparsity/model_sparsity": 0.8047906087998757, "compression_loss": 91.08998107910156, "distillation_loss": 5.057939052581787, "epoch": 3.42, "learning_rate": 3.6545505776275004e-05, "loss": 95.9447, "step": 4048, "task_loss": 3.3483328819274902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8543169474485485, "compression/movement_sparsity/importance_threshold": -0.0010203254677789136, "compression/movement_sparsity/linear_layer_sparsity": 0.833557758092408, "compression/movement_sparsity/model_sparsity": 0.8049224847783306, "compression_loss": 91.11447143554688, "distillation_loss": 6.482856750488281, "epoch": 3.42, "learning_rate": 3.65408096177327e-05, "loss": 95.3204, "step": 4049, "task_loss": 3.5287153720855713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8545506635310439, "compression/movement_sparsity/importance_threshold": -0.001018688582314041, "compression/movement_sparsity/linear_layer_sparsity": 0.8337770435352314, "compression/movement_sparsity/model_sparsity": 0.8051342370915923, "compression_loss": 91.13887023925781, "distillation_loss": 3.861699104309082, "epoch": 3.42, "learning_rate": 3.6536113459190384e-05, "loss": 95.8628, "step": 4050, "task_loss": 2.765523910522461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8547841295160965, "compression/movement_sparsity/importance_threshold": -0.0010170534484653382, "compression/movement_sparsity/linear_layer_sparsity": 0.8339206582102378, "compression/movement_sparsity/model_sparsity": 0.8052729181607017, "compression_loss": 91.16327667236328, "distillation_loss": 4.048453330993652, "epoch": 3.42, "learning_rate": 3.653141730064808e-05, "loss": 95.7282, "step": 4051, "task_loss": 3.1812658309936523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8550173455375915, "compression/movement_sparsity/importance_threshold": -0.0010154200652951074, "compression/movement_sparsity/linear_layer_sparsity": 0.8340673016238236, "compression/movement_sparsity/model_sparsity": 0.8054145239219029, "compression_loss": 91.18772888183594, "distillation_loss": 3.323960304260254, "epoch": 3.42, "learning_rate": 3.6526721142105757e-05, "loss": 94.4332, "step": 4052, "task_loss": 2.219477415084839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8552503117294142, "compression/movement_sparsity/importance_threshold": -0.0010137884318656507, "compression/movement_sparsity/linear_layer_sparsity": 0.8342724688521662, "compression/movement_sparsity/model_sparsity": 0.8056126430247839, "compression_loss": 91.21202850341797, "distillation_loss": 4.079426288604736, "epoch": 3.43, "learning_rate": 3.652202498356345e-05, "loss": 94.9351, "step": 4053, "task_loss": 2.078655958175659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8554830282254496, "compression/movement_sparsity/importance_threshold": -0.0010121585472392747, "compression/movement_sparsity/linear_layer_sparsity": 0.8344894767789713, "compression/movement_sparsity/model_sparsity": 0.8058221960617087, "compression_loss": 91.2363052368164, "distillation_loss": 5.734855651855469, "epoch": 3.43, "learning_rate": 3.6517328825021136e-05, "loss": 96.0173, "step": 4054, "task_loss": 2.78306245803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8557154951595831, "compression/movement_sparsity/importance_threshold": -0.0010105304104782806, "compression/movement_sparsity/linear_layer_sparsity": 0.8346943935997937, "compression/movement_sparsity/model_sparsity": 0.8060200733593378, "compression_loss": 91.26063537597656, "distillation_loss": 3.758881092071533, "epoch": 3.43, "learning_rate": 3.651263266647882e-05, "loss": 95.4428, "step": 4055, "task_loss": 2.015864133834839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8559477126656994, "compression/movement_sparsity/importance_threshold": -0.0010089040206449759, "compression/movement_sparsity/linear_layer_sparsity": 0.8349092790247595, "compression/movement_sparsity/model_sparsity": 0.8062275768088912, "compression_loss": 91.28484344482422, "distillation_loss": 3.075552463531494, "epoch": 3.43, "learning_rate": 3.650793650793651e-05, "loss": 95.2995, "step": 4056, "task_loss": 1.9622286558151245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.856179680877684, "compression/movement_sparsity/importance_threshold": -0.00100727937680166, "compression/movement_sparsity/linear_layer_sparsity": 0.8350782564043273, "compression/movement_sparsity/model_sparsity": 0.8063907492956357, "compression_loss": 91.30905151367188, "distillation_loss": 3.255462646484375, "epoch": 3.43, "learning_rate": 3.6503240349394195e-05, "loss": 94.978, "step": 4057, "task_loss": 1.6003787517547607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8564113999294217, "compression/movement_sparsity/importance_threshold": -0.001005656478010642, "compression/movement_sparsity/linear_layer_sparsity": 0.8352217876101602, "compression/movement_sparsity/model_sparsity": 0.8065293497629946, "compression_loss": 91.3332290649414, "distillation_loss": 5.117107391357422, "epoch": 3.43, "learning_rate": 3.649854419085189e-05, "loss": 95.4985, "step": 4058, "task_loss": 3.1110661029815674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8566428699547981, "compression/movement_sparsity/importance_threshold": -0.0010040353233342217, "compression/movement_sparsity/linear_layer_sparsity": 0.8354285526769661, "compression/movement_sparsity/model_sparsity": 0.806729011813672, "compression_loss": 91.3573226928711, "distillation_loss": 5.257266044616699, "epoch": 3.43, "learning_rate": 3.6493848032309575e-05, "loss": 95.7893, "step": 4059, "task_loss": 3.6231908798217773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8568740910876979, "compression/movement_sparsity/importance_threshold": -0.0010024159118347071, "compression/movement_sparsity/linear_layer_sparsity": 0.8355146093947942, "compression/movement_sparsity/model_sparsity": 0.8068121122185011, "compression_loss": 91.38145446777344, "distillation_loss": 3.8260996341705322, "epoch": 3.43, "learning_rate": 3.648915187376726e-05, "loss": 94.9752, "step": 4060, "task_loss": 1.9420841932296753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8571050634620064, "compression/movement_sparsity/importance_threshold": -0.0010007982425743979, "compression/movement_sparsity/linear_layer_sparsity": 0.8355958129763943, "compression/movement_sparsity/model_sparsity": 0.806890526207262, "compression_loss": 91.40552520751953, "distillation_loss": 6.000916481018066, "epoch": 3.43, "learning_rate": 3.648445571522495e-05, "loss": 95.8092, "step": 4061, "task_loss": 1.9992434978485107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.857335787211609, "compression/movement_sparsity/importance_threshold": -0.0009991823146156004, "compression/movement_sparsity/linear_layer_sparsity": 0.8357542374676045, "compression/movement_sparsity/model_sparsity": 0.8070435083298282, "compression_loss": 91.42961120605469, "distillation_loss": 5.9295501708984375, "epoch": 3.43, "learning_rate": 3.6479759556682634e-05, "loss": 95.7548, "step": 4062, "task_loss": 2.9101176261901855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8575662624703904, "compression/movement_sparsity/importance_threshold": -0.0009975681270206187, "compression/movement_sparsity/linear_layer_sparsity": 0.8359264701449368, "compression/movement_sparsity/model_sparsity": 0.8072098242848446, "compression_loss": 91.45365142822266, "distillation_loss": 4.540140151977539, "epoch": 3.43, "learning_rate": 3.647506339814033e-05, "loss": 95.8558, "step": 4063, "task_loss": 3.298372745513916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8577964893722361, "compression/movement_sparsity/importance_threshold": -0.0009959556788517558, "compression/movement_sparsity/linear_layer_sparsity": 0.8360303892658834, "compression/movement_sparsity/model_sparsity": 0.807310173464294, "compression_loss": 91.4776611328125, "distillation_loss": 5.1201324462890625, "epoch": 3.44, "learning_rate": 3.6470367239598006e-05, "loss": 95.8437, "step": 4064, "task_loss": 2.2403018474578857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858026468051031, "compression/movement_sparsity/importance_threshold": -0.0009943449691713156, "compression/movement_sparsity/linear_layer_sparsity": 0.8362778038201599, "compression/movement_sparsity/model_sparsity": 0.8075490885674949, "compression_loss": 91.50166320800781, "distillation_loss": 4.3699870109558105, "epoch": 3.44, "learning_rate": 3.64656710810557e-05, "loss": 95.6369, "step": 4065, "task_loss": 2.469433069229126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8582561986406604, "compression/movement_sparsity/importance_threshold": -0.0009927359970416036, "compression/movement_sparsity/linear_layer_sparsity": 0.8364019701777521, "compression/movement_sparsity/model_sparsity": 0.8076689894287234, "compression_loss": 91.525634765625, "distillation_loss": 5.020072937011719, "epoch": 3.44, "learning_rate": 3.6460974922513386e-05, "loss": 95.528, "step": 4066, "task_loss": 3.185976505279541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8584856812750092, "compression/movement_sparsity/importance_threshold": -0.000991128761524923, "compression/movement_sparsity/linear_layer_sparsity": 0.8365673703070292, "compression/movement_sparsity/model_sparsity": 0.8078287075547296, "compression_loss": 91.54962158203125, "distillation_loss": 4.111250400543213, "epoch": 3.44, "learning_rate": 3.645627876397107e-05, "loss": 96.1807, "step": 4067, "task_loss": 3.4712131023406982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858714916087963, "compression/movement_sparsity/importance_threshold": -0.0009895232616835758, "compression/movement_sparsity/linear_layer_sparsity": 0.8366727799489302, "compression/movement_sparsity/model_sparsity": 0.8079304960511533, "compression_loss": 91.57353210449219, "distillation_loss": 4.172990798950195, "epoch": 3.44, "learning_rate": 3.6451582605428765e-05, "loss": 95.7808, "step": 4068, "task_loss": 2.247328042984009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8589439032134064, "compression/movement_sparsity/importance_threshold": -0.0009879194965798704, "compression/movement_sparsity/linear_layer_sparsity": 0.8368362364388826, "compression/movement_sparsity/model_sparsity": 0.8080883373078248, "compression_loss": 91.5974349975586, "distillation_loss": 5.47062873840332, "epoch": 3.44, "learning_rate": 3.6446886446886445e-05, "loss": 95.2017, "step": 4069, "task_loss": 2.432814836502075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8591726427852251, "compression/movement_sparsity/importance_threshold": -0.0009863174652761054, "compression/movement_sparsity/linear_layer_sparsity": 0.8370138826883217, "compression/movement_sparsity/model_sparsity": 0.8082598808620919, "compression_loss": 91.62129974365234, "distillation_loss": 4.852380752563477, "epoch": 3.44, "learning_rate": 3.644219028834414e-05, "loss": 95.8271, "step": 4070, "task_loss": 2.304025411605835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8594011349373036, "compression/movement_sparsity/importance_threshold": -0.000984717166834589, "compression/movement_sparsity/linear_layer_sparsity": 0.8372518294534954, "compression/movement_sparsity/model_sparsity": 0.808489653423872, "compression_loss": 91.6451416015625, "distillation_loss": 8.230361938476562, "epoch": 3.44, "learning_rate": 3.6437494129801824e-05, "loss": 96.9659, "step": 4071, "task_loss": 5.1191277503967285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8596293798035276, "compression/movement_sparsity/importance_threshold": -0.0009831186003176236, "compression/movement_sparsity/linear_layer_sparsity": 0.8373328184000781, "compression/movement_sparsity/model_sparsity": 0.8085678601509885, "compression_loss": 91.66901397705078, "distillation_loss": 4.487608909606934, "epoch": 3.44, "learning_rate": 3.643279797125951e-05, "loss": 96.3496, "step": 4072, "task_loss": 2.969597101211548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8598573775177818, "compression/movement_sparsity/importance_threshold": -0.0009815217647875145, "compression/movement_sparsity/linear_layer_sparsity": 0.8375153416340803, "compression/movement_sparsity/model_sparsity": 0.8087441131503955, "compression_loss": 91.69283294677734, "distillation_loss": 5.0012526512146, "epoch": 3.44, "learning_rate": 3.64281018127172e-05, "loss": 96.6891, "step": 4073, "task_loss": 2.7415690422058105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8600851282139517, "compression/movement_sparsity/importance_threshold": -0.0009799266593065632, "compression/movement_sparsity/linear_layer_sparsity": 0.8376961477879429, "compression/movement_sparsity/model_sparsity": 0.8089187080566481, "compression_loss": 91.71662902832031, "distillation_loss": 3.738043785095215, "epoch": 3.44, "learning_rate": 3.642340565417488e-05, "loss": 96.1812, "step": 4074, "task_loss": 2.1282966136932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8603126320259223, "compression/movement_sparsity/importance_threshold": -0.0009783332829370744, "compression/movement_sparsity/linear_layer_sparsity": 0.8379011123054357, "compression/movement_sparsity/model_sparsity": 0.8091166314124205, "compression_loss": 91.74044036865234, "distillation_loss": 4.446313858032227, "epoch": 3.44, "learning_rate": 3.6418709495632576e-05, "loss": 96.1875, "step": 4075, "task_loss": 2.660594940185547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8605398890875786, "compression/movement_sparsity/importance_threshold": -0.0009767416347413547, "compression/movement_sparsity/linear_layer_sparsity": 0.8379707852169321, "compression/movement_sparsity/model_sparsity": 0.809183910845068, "compression_loss": 91.76414489746094, "distillation_loss": 2.522665500640869, "epoch": 3.45, "learning_rate": 3.641401333709026e-05, "loss": 95.9157, "step": 4076, "task_loss": 2.1633448600769043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8607668995328062, "compression/movement_sparsity/importance_threshold": -0.0009751517137817035, "compression/movement_sparsity/linear_layer_sparsity": 0.8381572553504217, "compression/movement_sparsity/model_sparsity": 0.809363975155823, "compression_loss": 91.78778839111328, "distillation_loss": 5.533056259155273, "epoch": 3.45, "learning_rate": 3.640931717854795e-05, "loss": 95.9745, "step": 4077, "task_loss": 3.6567044258117676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8609936634954896, "compression/movement_sparsity/importance_threshold": -0.00097356351912043, "compression/movement_sparsity/linear_layer_sparsity": 0.8382955041499919, "compression/movement_sparsity/model_sparsity": 0.8094974746838249, "compression_loss": 91.81153869628906, "distillation_loss": 3.513753890991211, "epoch": 3.45, "learning_rate": 3.6404621020005635e-05, "loss": 95.7425, "step": 4078, "task_loss": 1.9147870540618896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8612201811095145, "compression/movement_sparsity/importance_threshold": -0.0009719770498198329, "compression/movement_sparsity/linear_layer_sparsity": 0.8384048606913804, "compression/movement_sparsity/model_sparsity": 0.8096030744915967, "compression_loss": 91.83518981933594, "distillation_loss": 3.5299763679504395, "epoch": 3.45, "learning_rate": 3.639992486146332e-05, "loss": 95.3543, "step": 4079, "task_loss": 1.5012915134429932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8614464525087656, "compression/movement_sparsity/importance_threshold": -0.0009703923049422195, "compression/movement_sparsity/linear_layer_sparsity": 0.8386323380373698, "compression/movement_sparsity/model_sparsity": 0.8098227372909491, "compression_loss": 91.8587646484375, "distillation_loss": 3.9649181365966797, "epoch": 3.45, "learning_rate": 3.6395228702921015e-05, "loss": 96.1386, "step": 4080, "task_loss": 2.0053443908691406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8616724778271284, "compression/movement_sparsity/importance_threshold": -0.0009688092835498929, "compression/movement_sparsity/linear_layer_sparsity": 0.8387931592862746, "compression/movement_sparsity/model_sparsity": 0.8099780338352102, "compression_loss": 91.88239288330078, "distillation_loss": 4.441703796386719, "epoch": 3.45, "learning_rate": 3.63905325443787e-05, "loss": 96.5573, "step": 4081, "task_loss": 3.1673943996429443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8618982571984878, "compression/movement_sparsity/importance_threshold": -0.0009672279847051579, "compression/movement_sparsity/linear_layer_sparsity": 0.8390496004354515, "compression/movement_sparsity/model_sparsity": 0.8102256654420075, "compression_loss": 91.90599060058594, "distillation_loss": 5.3949761390686035, "epoch": 3.45, "learning_rate": 3.638583638583639e-05, "loss": 96.3088, "step": 4082, "task_loss": 4.461790561676025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.862123790756729, "compression/movement_sparsity/importance_threshold": -0.0009656484074703183, "compression/movement_sparsity/linear_layer_sparsity": 0.8392365475356466, "compression/movement_sparsity/model_sparsity": 0.8104061903341944, "compression_loss": 91.92950439453125, "distillation_loss": 3.968522071838379, "epoch": 3.45, "learning_rate": 3.6381140227294074e-05, "loss": 95.3029, "step": 4083, "task_loss": 1.5124552249908447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8623490786357372, "compression/movement_sparsity/importance_threshold": -0.0009640705509076763, "compression/movement_sparsity/linear_layer_sparsity": 0.8393858619627829, "compression/movement_sparsity/model_sparsity": 0.8105503753514136, "compression_loss": 91.95293426513672, "distillation_loss": 3.84421706199646, "epoch": 3.45, "learning_rate": 3.637644406875177e-05, "loss": 95.8285, "step": 4084, "task_loss": 0.8956551551818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8625741209693975, "compression/movement_sparsity/importance_threshold": -0.0009624944140795384, "compression/movement_sparsity/linear_layer_sparsity": 0.8394532692824284, "compression/movement_sparsity/model_sparsity": 0.8106154670222601, "compression_loss": 91.9764175415039, "distillation_loss": 5.489009380340576, "epoch": 3.45, "learning_rate": 3.637174791020945e-05, "loss": 96.0301, "step": 4085, "task_loss": 2.8080403804779053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8627989178915951, "compression/movement_sparsity/importance_threshold": -0.000960919996048205, "compression/movement_sparsity/linear_layer_sparsity": 0.8396178227958033, "compression/movement_sparsity/model_sparsity": 0.8107743676162248, "compression_loss": 91.99991607666016, "distillation_loss": 3.9707391262054443, "epoch": 3.45, "learning_rate": 3.636705175166713e-05, "loss": 96.3037, "step": 4086, "task_loss": 2.288635730743408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8630234695362149, "compression/movement_sparsity/importance_threshold": -0.0009593472958759853, "compression/movement_sparsity/linear_layer_sparsity": 0.8397292779906957, "compression/movement_sparsity/model_sparsity": 0.8108819939822963, "compression_loss": 92.02336120605469, "distillation_loss": 5.043789386749268, "epoch": 3.45, "learning_rate": 3.6362355593124826e-05, "loss": 96.7005, "step": 4087, "task_loss": 2.462646245956421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8632477760371424, "compression/movement_sparsity/importance_threshold": -0.0009577763126251788, "compression/movement_sparsity/linear_layer_sparsity": 0.8399765733032959, "compression/movement_sparsity/model_sparsity": 0.8111207939401394, "compression_loss": 92.04676055908203, "distillation_loss": 5.533036231994629, "epoch": 3.46, "learning_rate": 3.635765943458251e-05, "loss": 96.5232, "step": 4088, "task_loss": 3.296221971511841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8634718375282625, "compression/movement_sparsity/importance_threshold": -0.0009562070453580911, "compression/movement_sparsity/linear_layer_sparsity": 0.8401702217857023, "compression/movement_sparsity/model_sparsity": 0.8113077900014427, "compression_loss": 92.07014465332031, "distillation_loss": 5.672863960266113, "epoch": 3.46, "learning_rate": 3.6352963276040205e-05, "loss": 96.7002, "step": 4089, "task_loss": 3.5039267539978027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8636956541434606, "compression/movement_sparsity/importance_threshold": -0.0009546394931370253, "compression/movement_sparsity/linear_layer_sparsity": 0.8403799798185848, "compression/movement_sparsity/model_sparsity": 0.8115103422006045, "compression_loss": 92.093505859375, "distillation_loss": 3.9402217864990234, "epoch": 3.46, "learning_rate": 3.6348267117497885e-05, "loss": 96.8754, "step": 4090, "task_loss": 2.8740739822387695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8639192260166213, "compression/movement_sparsity/importance_threshold": -0.0009530736550242887, "compression/movement_sparsity/linear_layer_sparsity": 0.8405805681665552, "compression/movement_sparsity/model_sparsity": 0.8117040397217402, "compression_loss": 92.11685943603516, "distillation_loss": 5.592130661010742, "epoch": 3.46, "learning_rate": 3.634357095895558e-05, "loss": 96.7769, "step": 4091, "task_loss": 2.790256977081299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8641425532816303, "compression/movement_sparsity/importance_threshold": -0.000951509530082181, "compression/movement_sparsity/linear_layer_sparsity": 0.8408162493398781, "compression/movement_sparsity/model_sparsity": 0.8119316245217193, "compression_loss": 92.14022827148438, "distillation_loss": 3.55947208404541, "epoch": 3.46, "learning_rate": 3.6338874800413264e-05, "loss": 96.5903, "step": 4092, "task_loss": 1.6302986145019531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8643656360723726, "compression/movement_sparsity/importance_threshold": -0.0009499471173730076, "compression/movement_sparsity/linear_layer_sparsity": 0.8410552931284743, "compression/movement_sparsity/model_sparsity": 0.8121624564207924, "compression_loss": 92.16349792480469, "distillation_loss": 5.414210319519043, "epoch": 3.46, "learning_rate": 3.633417864187095e-05, "loss": 96.9089, "step": 4093, "task_loss": 2.384058713912964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.864588474522733, "compression/movement_sparsity/importance_threshold": -0.0009483864159590743, "compression/movement_sparsity/linear_layer_sparsity": 0.8412111300753073, "compression/movement_sparsity/model_sparsity": 0.8123129398890913, "compression_loss": 92.18675231933594, "distillation_loss": 4.2238006591796875, "epoch": 3.46, "learning_rate": 3.632948248332864e-05, "loss": 96.3399, "step": 4094, "task_loss": 1.5896131992340088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8648110687665972, "compression/movement_sparsity/importance_threshold": -0.0009468274249026824, "compression/movement_sparsity/linear_layer_sparsity": 0.841260686916002, "compression/movement_sparsity/model_sparsity": 0.8123607942998534, "compression_loss": 92.21001434326172, "distillation_loss": 3.8465700149536133, "epoch": 3.46, "learning_rate": 3.6324786324786323e-05, "loss": 96.8738, "step": 4095, "task_loss": 3.6973462104797363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8650334189378499, "compression/movement_sparsity/importance_threshold": -0.0009452701432661383, "compression/movement_sparsity/linear_layer_sparsity": 0.8413490092256808, "compression/movement_sparsity/model_sparsity": 0.8124460824664835, "compression_loss": 92.23323059082031, "distillation_loss": 3.7955331802368164, "epoch": 3.46, "learning_rate": 3.6320090166244017e-05, "loss": 96.2236, "step": 4096, "task_loss": 1.742809534072876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8652555251703765, "compression/movement_sparsity/importance_threshold": -0.0009437145701117434, "compression/movement_sparsity/linear_layer_sparsity": 0.8414236068184108, "compression/movement_sparsity/model_sparsity": 0.8125181174024142, "compression_loss": 92.2564926147461, "distillation_loss": 3.4154999256134033, "epoch": 3.46, "learning_rate": 3.63153940077017e-05, "loss": 96.3124, "step": 4097, "task_loss": 2.207719564437866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8654773875980619, "compression/movement_sparsity/importance_threshold": -0.0009421607045018042, "compression/movement_sparsity/linear_layer_sparsity": 0.8416875124965277, "compression/movement_sparsity/model_sparsity": 0.812772957108619, "compression_loss": 92.27965545654297, "distillation_loss": 4.931546211242676, "epoch": 3.46, "learning_rate": 3.631069784915939e-05, "loss": 96.589, "step": 4098, "task_loss": 2.900505781173706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8656990063547915, "compression/movement_sparsity/importance_threshold": -0.0009406085454986227, "compression/movement_sparsity/linear_layer_sparsity": 0.8417996712173106, "compression/movement_sparsity/model_sparsity": 0.8128812628323024, "compression_loss": 92.30279541015625, "distillation_loss": 6.492554187774658, "epoch": 3.46, "learning_rate": 3.6306001690617076e-05, "loss": 96.9467, "step": 4099, "task_loss": 2.6742470264434814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8659203815744502, "compression/movement_sparsity/importance_threshold": -0.0009390580921645048, "compression/movement_sparsity/linear_layer_sparsity": 0.8419991386935233, "compression/movement_sparsity/model_sparsity": 0.8130738779870735, "compression_loss": 92.32596588134766, "distillation_loss": 3.8657307624816895, "epoch": 3.47, "learning_rate": 3.630130553207476e-05, "loss": 96.578, "step": 4100, "task_loss": 1.6040565967559814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8661415133909232, "compression/movement_sparsity/importance_threshold": -0.0009375093435617533, "compression/movement_sparsity/linear_layer_sparsity": 0.8421679014380737, "compression/movement_sparsity/model_sparsity": 0.8132368432121737, "compression_loss": 92.34902954101562, "distillation_loss": 3.511505126953125, "epoch": 3.47, "learning_rate": 3.6296609373532455e-05, "loss": 95.8788, "step": 4101, "task_loss": 2.006782054901123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8663624019380959, "compression/movement_sparsity/importance_threshold": -0.0009359622987526704, "compression/movement_sparsity/linear_layer_sparsity": 0.8423449037824604, "compression/movement_sparsity/model_sparsity": 0.8134077649815078, "compression_loss": 92.3720703125, "distillation_loss": 4.069624900817871, "epoch": 3.47, "learning_rate": 3.629191321499014e-05, "loss": 96.4233, "step": 4102, "task_loss": 1.6975586414337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.866583047349853, "compression/movement_sparsity/importance_threshold": -0.0009344169567995645, "compression/movement_sparsity/linear_layer_sparsity": 0.8425303961342039, "compression/movement_sparsity/model_sparsity": 0.8135868851003277, "compression_loss": 92.39503479003906, "distillation_loss": 3.04959774017334, "epoch": 3.47, "learning_rate": 3.628721705644783e-05, "loss": 95.9314, "step": 4103, "task_loss": 1.9361587762832642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8668034497600802, "compression/movement_sparsity/importance_threshold": -0.0009328733167647341, "compression/movement_sparsity/linear_layer_sparsity": 0.8426187303680504, "compression/movement_sparsity/model_sparsity": 0.8136721847814936, "compression_loss": 92.41804504394531, "distillation_loss": 5.094153881072998, "epoch": 3.47, "learning_rate": 3.6282520897905514e-05, "loss": 96.6316, "step": 4104, "task_loss": 3.217383861541748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.867023609302662, "compression/movement_sparsity/importance_threshold": -0.0009313313777104893, "compression/movement_sparsity/linear_layer_sparsity": 0.8428014920854052, "compression/movement_sparsity/model_sparsity": 0.8138486680716166, "compression_loss": 92.4410171508789, "distillation_loss": 3.4006552696228027, "epoch": 3.47, "learning_rate": 3.62778247393632e-05, "loss": 96.2341, "step": 4105, "task_loss": 1.98316490650177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.867243526111484, "compression/movement_sparsity/importance_threshold": -0.0009297911386991287, "compression/movement_sparsity/linear_layer_sparsity": 0.8429851123428299, "compression/movement_sparsity/model_sparsity": 0.8140259804083168, "compression_loss": 92.46395874023438, "distillation_loss": 6.713625431060791, "epoch": 3.47, "learning_rate": 3.6273128580820893e-05, "loss": 96.9213, "step": 4106, "task_loss": 3.0951333045959473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8674632003204312, "compression/movement_sparsity/importance_threshold": -0.0009282525987929589, "compression/movement_sparsity/linear_layer_sparsity": 0.8431407704271485, "compression/movement_sparsity/model_sparsity": 0.8141762911585787, "compression_loss": 92.48689270019531, "distillation_loss": 3.7216808795928955, "epoch": 3.47, "learning_rate": 3.626843242227857e-05, "loss": 96.6517, "step": 4107, "task_loss": 2.667104482650757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8676826320633888, "compression/movement_sparsity/importance_threshold": -0.0009267157570542837, "compression/movement_sparsity/linear_layer_sparsity": 0.8432869368740289, "compression/movement_sparsity/model_sparsity": 0.8143174363383482, "compression_loss": 92.50981140136719, "distillation_loss": 4.892443656921387, "epoch": 3.47, "learning_rate": 3.6263736263736266e-05, "loss": 96.9249, "step": 4108, "task_loss": 2.320315361022949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8679018214742417, "compression/movement_sparsity/importance_threshold": -0.0009251806125454079, "compression/movement_sparsity/linear_layer_sparsity": 0.8434904347189025, "compression/movement_sparsity/model_sparsity": 0.8145139434062177, "compression_loss": 92.53267669677734, "distillation_loss": 5.138246536254883, "epoch": 3.47, "learning_rate": 3.625904010519395e-05, "loss": 96.3104, "step": 4109, "task_loss": 2.6896862983703613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8681207686868754, "compression/movement_sparsity/importance_threshold": -0.0009236471643286328, "compression/movement_sparsity/linear_layer_sparsity": 0.8436679855750006, "compression/movement_sparsity/model_sparsity": 0.8146853948441984, "compression_loss": 92.55548858642578, "distillation_loss": 5.047441482543945, "epoch": 3.47, "learning_rate": 3.625434394665164e-05, "loss": 96.2501, "step": 4110, "task_loss": 2.9783132076263428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8683394738351747, "compression/movement_sparsity/importance_threshold": -0.000922115411466265, "compression/movement_sparsity/linear_layer_sparsity": 0.8438888807804549, "compression/movement_sparsity/model_sparsity": 0.8148987016197924, "compression_loss": 92.57828521728516, "distillation_loss": 2.9110918045043945, "epoch": 3.47, "learning_rate": 3.624964778810933e-05, "loss": 96.7583, "step": 4111, "task_loss": 1.374081015586853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8685579370530251, "compression/movement_sparsity/importance_threshold": -0.0009205853530206065, "compression/movement_sparsity/linear_layer_sparsity": 0.8440674332666344, "compression/movement_sparsity/model_sparsity": 0.8150711202787798, "compression_loss": 92.60106658935547, "distillation_loss": 3.0939955711364746, "epoch": 3.48, "learning_rate": 3.624495162956701e-05, "loss": 96.3629, "step": 4112, "task_loss": 1.7925692796707153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8687761584743113, "compression/movement_sparsity/importance_threshold": -0.000919056988053963, "compression/movement_sparsity/linear_layer_sparsity": 0.8442820205874093, "compression/movement_sparsity/model_sparsity": 0.8152783358649384, "compression_loss": 92.62384796142578, "distillation_loss": 3.8929269313812256, "epoch": 3.48, "learning_rate": 3.6240255471024705e-05, "loss": 96.7755, "step": 4113, "task_loss": 2.8315958976745605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8689941382329187, "compression/movement_sparsity/importance_threshold": -0.0009175303156286384, "compression/movement_sparsity/linear_layer_sparsity": 0.8443706648496143, "compression/movement_sparsity/model_sparsity": 0.815363934924035, "compression_loss": 92.64655303955078, "distillation_loss": 4.712027549743652, "epoch": 3.48, "learning_rate": 3.623555931248239e-05, "loss": 96.8788, "step": 4114, "task_loss": 2.8611810207366943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8692118764627327, "compression/movement_sparsity/importance_threshold": -0.000916005334806934, "compression/movement_sparsity/linear_layer_sparsity": 0.844465998569863, "compression/movement_sparsity/model_sparsity": 0.8154559936377124, "compression_loss": 92.66927337646484, "distillation_loss": 4.937551021575928, "epoch": 3.48, "learning_rate": 3.6230863153940084e-05, "loss": 97.9486, "step": 4115, "task_loss": 2.6333107948303223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8694293732976379, "compression/movement_sparsity/importance_threshold": -0.0009144820446511572, "compression/movement_sparsity/linear_layer_sparsity": 0.844653208001746, "compression/movement_sparsity/model_sparsity": 0.8156367718496866, "compression_loss": 92.6919937133789, "distillation_loss": 3.503652572631836, "epoch": 3.48, "learning_rate": 3.6226166995397764e-05, "loss": 96.9067, "step": 4116, "task_loss": 2.071707248687744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8696466288715199, "compression/movement_sparsity/importance_threshold": -0.0009129604442236101, "compression/movement_sparsity/linear_layer_sparsity": 0.8447835391540061, "compression/movement_sparsity/model_sparsity": 0.815762625725921, "compression_loss": 92.71459197998047, "distillation_loss": 4.400373935699463, "epoch": 3.48, "learning_rate": 3.622147083685546e-05, "loss": 97.4509, "step": 4117, "task_loss": 2.686494827270508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8698636433182634, "compression/movement_sparsity/importance_threshold": -0.0009114405325865984, "compression/movement_sparsity/linear_layer_sparsity": 0.8449163863056373, "compression/movement_sparsity/model_sparsity": 0.8158909091692079, "compression_loss": 92.73721313476562, "distillation_loss": 5.112481117248535, "epoch": 3.48, "learning_rate": 3.621677467831314e-05, "loss": 96.7751, "step": 4118, "task_loss": 2.508967638015747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8700804167717541, "compression/movement_sparsity/importance_threshold": -0.0009099223088024225, "compression/movement_sparsity/linear_layer_sparsity": 0.8451095697455059, "compression/movement_sparsity/model_sparsity": 0.8160774561636153, "compression_loss": 92.75982666015625, "distillation_loss": 3.9895613193511963, "epoch": 3.48, "learning_rate": 3.621207851977083e-05, "loss": 96.9279, "step": 4119, "task_loss": 1.7788417339324951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8702969493658765, "compression/movement_sparsity/importance_threshold": -0.0009084057719333906, "compression/movement_sparsity/linear_layer_sparsity": 0.8452769850591133, "compression/movement_sparsity/model_sparsity": 0.8162391202461706, "compression_loss": 92.78246307373047, "distillation_loss": 4.171713352203369, "epoch": 3.48, "learning_rate": 3.6207382361228516e-05, "loss": 96.4916, "step": 4120, "task_loss": 1.8571760654449463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8705132412345163, "compression/movement_sparsity/importance_threshold": -0.0009068909210418041, "compression/movement_sparsity/linear_layer_sparsity": 0.8454021411226194, "compression/movement_sparsity/model_sparsity": 0.8163599768138702, "compression_loss": 92.80498504638672, "distillation_loss": 3.7995357513427734, "epoch": 3.48, "learning_rate": 3.62026862026862e-05, "loss": 96.4815, "step": 4121, "task_loss": 1.2836567163467407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8707292925115583, "compression/movement_sparsity/importance_threshold": -0.0009053777551899685, "compression/movement_sparsity/linear_layer_sparsity": 0.8455747672974837, "compression/movement_sparsity/model_sparsity": 0.8165266727485677, "compression_loss": 92.82754516601562, "distillation_loss": 4.409570217132568, "epoch": 3.48, "learning_rate": 3.6197990044143895e-05, "loss": 97.4211, "step": 4122, "task_loss": 2.3507399559020996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8709451033308877, "compression/movement_sparsity/importance_threshold": -0.000903866273440186, "compression/movement_sparsity/linear_layer_sparsity": 0.8457196459342595, "compression/movement_sparsity/model_sparsity": 0.8166665743584713, "compression_loss": 92.85001373291016, "distillation_loss": 5.588755130767822, "epoch": 3.48, "learning_rate": 3.619329388560158e-05, "loss": 97.0848, "step": 4123, "task_loss": 3.704510450363159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8711606738263898, "compression/movement_sparsity/importance_threshold": -0.0009023564748547614, "compression/movement_sparsity/linear_layer_sparsity": 0.8458785950888456, "compression/movement_sparsity/model_sparsity": 0.8168200631206126, "compression_loss": 92.87252044677734, "distillation_loss": 3.340946674346924, "epoch": 3.49, "learning_rate": 3.618859772705927e-05, "loss": 96.258, "step": 4124, "task_loss": 2.989189863204956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8713760041319496, "compression/movement_sparsity/importance_threshold": -0.0009008483584959995, "compression/movement_sparsity/linear_layer_sparsity": 0.8460552874048738, "compression/movement_sparsity/model_sparsity": 0.8169906855120161, "compression_loss": 92.89494323730469, "distillation_loss": 4.447931289672852, "epoch": 3.49, "learning_rate": 3.6183901568516954e-05, "loss": 97.5464, "step": 4125, "task_loss": 2.370572328567505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8715910943814523, "compression/movement_sparsity/importance_threshold": -0.0008993419234262024, "compression/movement_sparsity/linear_layer_sparsity": 0.8462094549682379, "compression/movement_sparsity/model_sparsity": 0.8171395569453037, "compression_loss": 92.91735076904297, "distillation_loss": 4.193358898162842, "epoch": 3.49, "learning_rate": 3.617920540997464e-05, "loss": 97.6586, "step": 4126, "task_loss": 2.0952348709106445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8718059447087829, "compression/movement_sparsity/importance_threshold": -0.0008978371687076766, "compression/movement_sparsity/linear_layer_sparsity": 0.8462842314234823, "compression/movement_sparsity/model_sparsity": 0.8172117645992713, "compression_loss": 92.93975830078125, "distillation_loss": 3.5347540378570557, "epoch": 3.49, "learning_rate": 3.6174509251432334e-05, "loss": 97.025, "step": 4127, "task_loss": 1.2136402130126953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8720205552478268, "compression/movement_sparsity/importance_threshold": -0.0008963340934027234, "compression/movement_sparsity/linear_layer_sparsity": 0.8464045462749282, "compression/movement_sparsity/model_sparsity": 0.8173279462654383, "compression_loss": 92.96216583251953, "distillation_loss": 4.028942108154297, "epoch": 3.49, "learning_rate": 3.616981309289002e-05, "loss": 96.8976, "step": 4128, "task_loss": 2.8698887825012207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8722349261324689, "compression/movement_sparsity/importance_threshold": -0.0008948326965736494, "compression/movement_sparsity/linear_layer_sparsity": 0.8465271505665601, "compression/movement_sparsity/model_sparsity": 0.8174463387224776, "compression_loss": 92.9845199584961, "distillation_loss": 4.394811630249023, "epoch": 3.49, "learning_rate": 3.6165116934347706e-05, "loss": 97.6738, "step": 4129, "task_loss": 2.5135157108306885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8724490574965945, "compression/movement_sparsity/importance_threshold": -0.0008933329772827566, "compression/movement_sparsity/linear_layer_sparsity": 0.8465443809887939, "compression/movement_sparsity/model_sparsity": 0.8174629772267008, "compression_loss": 93.00684356689453, "distillation_loss": 4.606848239898682, "epoch": 3.49, "learning_rate": 3.616042077580539e-05, "loss": 96.7904, "step": 4130, "task_loss": 2.538403034210205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8726629494740886, "compression/movement_sparsity/importance_threshold": -0.0008918349345923499, "compression/movement_sparsity/linear_layer_sparsity": 0.8467007425990031, "compression/movement_sparsity/model_sparsity": 0.8176139673345746, "compression_loss": 93.02912139892578, "distillation_loss": 4.754396438598633, "epoch": 3.49, "learning_rate": 3.615572461726308e-05, "loss": 96.513, "step": 4131, "task_loss": 3.5716421604156494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8728766021988363, "compression/movement_sparsity/importance_threshold": -0.000890338567564734, "compression/movement_sparsity/linear_layer_sparsity": 0.846789160302023, "compression/movement_sparsity/model_sparsity": 0.8176993476174911, "compression_loss": 93.05137634277344, "distillation_loss": 4.951827049255371, "epoch": 3.49, "learning_rate": 3.615102845872077e-05, "loss": 97.137, "step": 4132, "task_loss": 2.5684168338775635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8730900158047231, "compression/movement_sparsity/importance_threshold": -0.0008888438752622102, "compression/movement_sparsity/linear_layer_sparsity": 0.8470180446997934, "compression/movement_sparsity/model_sparsity": 0.8179203691320673, "compression_loss": 93.0736312866211, "distillation_loss": 4.65708589553833, "epoch": 3.49, "learning_rate": 3.614633230017845e-05, "loss": 97.591, "step": 4133, "task_loss": 2.4607903957366943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8733031904256338, "compression/movement_sparsity/importance_threshold": -0.000887350856747085, "compression/movement_sparsity/linear_layer_sparsity": 0.847111446704885, "compression/movement_sparsity/model_sparsity": 0.818010562490946, "compression_loss": 93.0958023071289, "distillation_loss": 4.14515495300293, "epoch": 3.49, "learning_rate": 3.6141636141636145e-05, "loss": 97.2935, "step": 4134, "task_loss": 2.647430658340454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8735161261954536, "compression/movement_sparsity/importance_threshold": -0.0008858595110816615, "compression/movement_sparsity/linear_layer_sparsity": 0.8472969271324609, "compression/movement_sparsity/model_sparsity": 0.8181896710952301, "compression_loss": 93.11800384521484, "distillation_loss": 4.334054470062256, "epoch": 3.5, "learning_rate": 3.613693998309383e-05, "loss": 97.3124, "step": 4135, "task_loss": 2.912120819091797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8737288232480677, "compression/movement_sparsity/importance_threshold": -0.0008843698373282435, "compression/movement_sparsity/linear_layer_sparsity": 0.8474132235394135, "compression/movement_sparsity/model_sparsity": 0.8183019723628342, "compression_loss": 93.14007568359375, "distillation_loss": 3.711839437484741, "epoch": 3.5, "learning_rate": 3.613224382455152e-05, "loss": 96.926, "step": 4136, "task_loss": 2.7634055614471436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8739412817173613, "compression/movement_sparsity/importance_threshold": -0.000882881834549135, "compression/movement_sparsity/linear_layer_sparsity": 0.8474635554510045, "compression/movement_sparsity/model_sparsity": 0.818350575218423, "compression_loss": 93.16217041015625, "distillation_loss": 5.1326141357421875, "epoch": 3.5, "learning_rate": 3.6127547666009204e-05, "loss": 96.9891, "step": 4137, "task_loss": 2.757244348526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8741535017372193, "compression/movement_sparsity/importance_threshold": -0.0008813955018066415, "compression/movement_sparsity/linear_layer_sparsity": 0.8475131957608726, "compression/movement_sparsity/model_sparsity": 0.8183985102309357, "compression_loss": 93.1842269897461, "distillation_loss": 3.8709230422973633, "epoch": 3.5, "learning_rate": 3.612285150746689e-05, "loss": 97.1981, "step": 4138, "task_loss": 2.748084783554077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8743654834415271, "compression/movement_sparsity/importance_threshold": -0.0008799108381630644, "compression/movement_sparsity/linear_layer_sparsity": 0.8476391984402807, "compression/movement_sparsity/model_sparsity": 0.8185201843306765, "compression_loss": 93.20624542236328, "distillation_loss": 5.369759559631348, "epoch": 3.5, "learning_rate": 3.611815534892458e-05, "loss": 98.3797, "step": 4139, "task_loss": 3.713080644607544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8745772269641696, "compression/movement_sparsity/importance_threshold": -0.0008784278426807101, "compression/movement_sparsity/linear_layer_sparsity": 0.8477183152925447, "compression/movement_sparsity/model_sparsity": 0.8185965832756733, "compression_loss": 93.22822570800781, "distillation_loss": 5.3766350746154785, "epoch": 3.5, "learning_rate": 3.611345919038227e-05, "loss": 98.1595, "step": 4140, "task_loss": 2.455111026763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8747887324390324, "compression/movement_sparsity/importance_threshold": -0.0008769465144218792, "compression/movement_sparsity/linear_layer_sparsity": 0.8478586627456188, "compression/movement_sparsity/model_sparsity": 0.818732109361975, "compression_loss": 93.25021362304688, "distillation_loss": 3.059952974319458, "epoch": 3.5, "learning_rate": 3.6108763031839956e-05, "loss": 97.2107, "step": 4141, "task_loss": 3.122684955596924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.875, "compression/movement_sparsity/importance_threshold": -0.0008754668524488807, "compression/movement_sparsity/linear_layer_sparsity": 0.8480690289110506, "compression/movement_sparsity/model_sparsity": 0.8189352488024624, "compression_loss": 93.27220153808594, "distillation_loss": 5.326725959777832, "epoch": 3.5, "learning_rate": 3.610406687329764e-05, "loss": 97.7119, "step": 4142, "task_loss": 3.111431121826172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.875211029780958, "compression/movement_sparsity/importance_threshold": -0.0008739888558240141, "compression/movement_sparsity/linear_layer_sparsity": 0.848217687508967, "compression/movement_sparsity/model_sparsity": 0.8190788005202129, "compression_loss": 93.29415893554688, "distillation_loss": 4.338418483734131, "epoch": 3.5, "learning_rate": 3.609937071475533e-05, "loss": 97.7256, "step": 4143, "task_loss": 1.840023398399353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8754218219157913, "compression/movement_sparsity/importance_threshold": -0.000872512523609586, "compression/movement_sparsity/linear_layer_sparsity": 0.8482878254630011, "compression/movement_sparsity/model_sparsity": 0.8191465290197564, "compression_loss": 93.31608581542969, "distillation_loss": 5.189243316650391, "epoch": 3.5, "learning_rate": 3.609467455621302e-05, "loss": 97.6818, "step": 4144, "task_loss": 2.3773601055145264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8756323765383853, "compression/movement_sparsity/importance_threshold": -0.0008710378548678985, "compression/movement_sparsity/linear_layer_sparsity": 0.8484066497934925, "compression/movement_sparsity/model_sparsity": 0.819261271368949, "compression_loss": 93.3379898071289, "distillation_loss": 3.574031114578247, "epoch": 3.5, "learning_rate": 3.608997839767071e-05, "loss": 97.1067, "step": 4145, "task_loss": 1.1576638221740723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8758426937826249, "compression/movement_sparsity/importance_threshold": -0.0008695648486612573, "compression/movement_sparsity/linear_layer_sparsity": 0.8484710045262233, "compression/movement_sparsity/model_sparsity": 0.8193234153186322, "compression_loss": 93.35987854003906, "distillation_loss": 4.426519870758057, "epoch": 3.5, "learning_rate": 3.6085282239128394e-05, "loss": 97.8168, "step": 4146, "task_loss": 2.5952112674713135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8760527737823953, "compression/movement_sparsity/importance_threshold": -0.0008680935040519663, "compression/movement_sparsity/linear_layer_sparsity": 0.8486858899511891, "compression/movement_sparsity/model_sparsity": 0.8195309187681855, "compression_loss": 93.3817138671875, "distillation_loss": 4.929561614990234, "epoch": 3.51, "learning_rate": 3.608058608058608e-05, "loss": 97.4987, "step": 4147, "task_loss": 3.311614513397217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8762626166715816, "compression/movement_sparsity/importance_threshold": -0.0008666238201023285, "compression/movement_sparsity/linear_layer_sparsity": 0.84888980514193, "compression/movement_sparsity/model_sparsity": 0.819727828844808, "compression_loss": 93.40352630615234, "distillation_loss": 3.5207104682922363, "epoch": 3.51, "learning_rate": 3.6075889922043774e-05, "loss": 96.882, "step": 4148, "task_loss": 2.434013843536377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8764722225840692, "compression/movement_sparsity/importance_threshold": -0.0008651557958746468, "compression/movement_sparsity/linear_layer_sparsity": 0.8490061253972179, "compression/movement_sparsity/model_sparsity": 0.8198401531414837, "compression_loss": 93.42529296875, "distillation_loss": 3.7370734214782715, "epoch": 3.51, "learning_rate": 3.607119376350146e-05, "loss": 97.4195, "step": 4149, "task_loss": 1.7413933277130127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8766815916537428, "compression/movement_sparsity/importance_threshold": -0.0008636894304312288, "compression/movement_sparsity/linear_layer_sparsity": 0.8492498673078626, "compression/movement_sparsity/model_sparsity": 0.82007552176766, "compression_loss": 93.447021484375, "distillation_loss": 4.307043075561523, "epoch": 3.51, "learning_rate": 3.606649760495914e-05, "loss": 97.6733, "step": 4150, "task_loss": 3.040567636489868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.876890724014488, "compression/movement_sparsity/importance_threshold": -0.0008622247228343739, "compression/movement_sparsity/linear_layer_sparsity": 0.8494278474339955, "compression/movement_sparsity/model_sparsity": 0.8202473877289292, "compression_loss": 93.46873474121094, "distillation_loss": 4.055239677429199, "epoch": 3.51, "learning_rate": 3.606180144641683e-05, "loss": 98.0497, "step": 4151, "task_loss": 2.5396182537078857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8770996198001896, "compression/movement_sparsity/importance_threshold": -0.0008607616721463904, "compression/movement_sparsity/linear_layer_sparsity": 0.8496192541728863, "compression/movement_sparsity/model_sparsity": 0.8204322190575032, "compression_loss": 93.49040985107422, "distillation_loss": 3.8153319358825684, "epoch": 3.51, "learning_rate": 3.605710528787452e-05, "loss": 97.5929, "step": 4152, "task_loss": 1.693247675895691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.877308279144733, "compression/movement_sparsity/importance_threshold": -0.0008593002774295788, "compression/movement_sparsity/linear_layer_sparsity": 0.8497208361569762, "compression/movement_sparsity/model_sparsity": 0.820530311387937, "compression_loss": 93.51212310791016, "distillation_loss": 3.8544321060180664, "epoch": 3.51, "learning_rate": 3.605240912933221e-05, "loss": 97.5761, "step": 4153, "task_loss": 2.557344436645508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8775167021820031, "compression/movement_sparsity/importance_threshold": -0.0008578405377462454, "compression/movement_sparsity/linear_layer_sparsity": 0.849757813000815, "compression/movement_sparsity/model_sparsity": 0.8205660179634358, "compression_loss": 93.53378295898438, "distillation_loss": 4.353232383728027, "epoch": 3.51, "learning_rate": 3.604771297078989e-05, "loss": 97.8664, "step": 4154, "task_loss": 2.2120399475097656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8777248890458851, "compression/movement_sparsity/importance_threshold": -0.0008563824521586934, "compression/movement_sparsity/linear_layer_sparsity": 0.849799893388402, "compression/movement_sparsity/model_sparsity": 0.8206066527602547, "compression_loss": 93.55540466308594, "distillation_loss": 4.8854475021362305, "epoch": 3.51, "learning_rate": 3.6043016812247585e-05, "loss": 98.12, "step": 4155, "task_loss": 2.84133243560791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8779328398702643, "compression/movement_sparsity/importance_threshold": -0.0008549260197292258, "compression/movement_sparsity/linear_layer_sparsity": 0.8500345133108053, "compression/movement_sparsity/model_sparsity": 0.8208332127665481, "compression_loss": 93.57708740234375, "distillation_loss": 3.9159774780273438, "epoch": 3.51, "learning_rate": 3.603832065370527e-05, "loss": 98.1309, "step": 4156, "task_loss": 1.5043833255767822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8781405547890256, "compression/movement_sparsity/importance_threshold": -0.0008534712395201483, "compression/movement_sparsity/linear_layer_sparsity": 0.8502012727951928, "compression/movement_sparsity/model_sparsity": 0.8209942435496348, "compression_loss": 93.5986099243164, "distillation_loss": 4.348298072814941, "epoch": 3.51, "learning_rate": 3.603362449516296e-05, "loss": 97.9904, "step": 4157, "task_loss": 1.9731087684631348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8783480339360544, "compression/movement_sparsity/importance_threshold": -0.0008520181105937637, "compression/movement_sparsity/linear_layer_sparsity": 0.8503316993407939, "compression/movement_sparsity/model_sparsity": 0.8211201895421555, "compression_loss": 93.62019348144531, "distillation_loss": 4.849423408508301, "epoch": 3.51, "learning_rate": 3.602892833662065e-05, "loss": 98.4102, "step": 4158, "task_loss": 3.974299192428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8785552774452356, "compression/movement_sparsity/importance_threshold": -0.000850566632012377, "compression/movement_sparsity/linear_layer_sparsity": 0.8504595025695151, "compression/movement_sparsity/model_sparsity": 0.8212436023368014, "compression_loss": 93.64170837402344, "distillation_loss": 5.258194923400879, "epoch": 3.52, "learning_rate": 3.602423217807833e-05, "loss": 97.5949, "step": 4159, "task_loss": 2.7566659450531006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8787622854504545, "compression/movement_sparsity/importance_threshold": -0.0008491168028382911, "compression/movement_sparsity/linear_layer_sparsity": 0.850643504400304, "compression/movement_sparsity/model_sparsity": 0.8214212831386469, "compression_loss": 93.66324615478516, "distillation_loss": 6.0293073654174805, "epoch": 3.52, "learning_rate": 3.6019536019536024e-05, "loss": 98.5599, "step": 4160, "task_loss": 2.3827083110809326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8789690580855962, "compression/movement_sparsity/importance_threshold": -0.0008476686221338108, "compression/movement_sparsity/linear_layer_sparsity": 0.8507477097012739, "compression/movement_sparsity/model_sparsity": 0.8215219086669554, "compression_loss": 93.68476104736328, "distillation_loss": 4.878064155578613, "epoch": 3.52, "learning_rate": 3.601483986099371e-05, "loss": 97.8712, "step": 4161, "task_loss": 2.801372766494751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8791755954845459, "compression/movement_sparsity/importance_threshold": -0.0008462220889612392, "compression/movement_sparsity/linear_layer_sparsity": 0.8508898696278286, "compression/movement_sparsity/model_sparsity": 0.821659184962698, "compression_loss": 93.70625305175781, "distillation_loss": 5.138028144836426, "epoch": 3.52, "learning_rate": 3.6010143702451396e-05, "loss": 98.4596, "step": 4162, "task_loss": 3.43042254447937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8793818977811886, "compression/movement_sparsity/importance_threshold": -0.0008447772023828818, "compression/movement_sparsity/linear_layer_sparsity": 0.8510083481574585, "compression/movement_sparsity/model_sparsity": 0.8217735933903525, "compression_loss": 93.72772979736328, "distillation_loss": 3.8158538341522217, "epoch": 3.52, "learning_rate": 3.600544754390908e-05, "loss": 98.5376, "step": 4163, "task_loss": 1.2900984287261963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8795879651094095, "compression/movement_sparsity/importance_threshold": -0.00084333396146104, "compression/movement_sparsity/linear_layer_sparsity": 0.8512126210732285, "compression/movement_sparsity/model_sparsity": 0.8219708489030487, "compression_loss": 93.7491226196289, "distillation_loss": 5.257659435272217, "epoch": 3.52, "learning_rate": 3.600075138536677e-05, "loss": 98.4433, "step": 4164, "task_loss": 2.4934074878692627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8797937976030938, "compression/movement_sparsity/importance_threshold": -0.0008418923652580203, "compression/movement_sparsity/linear_layer_sparsity": 0.8513078236276332, "compression/movement_sparsity/model_sparsity": 0.8220627809568324, "compression_loss": 93.77058410644531, "distillation_loss": 5.180685043334961, "epoch": 3.52, "learning_rate": 3.599605522682446e-05, "loss": 97.656, "step": 4165, "task_loss": 3.353649854660034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8799993953961265, "compression/movement_sparsity/importance_threshold": -0.0008404524128361266, "compression/movement_sparsity/linear_layer_sparsity": 0.8514576865664808, "compression/movement_sparsity/model_sparsity": 0.8222074956426981, "compression_loss": 93.79193878173828, "distillation_loss": 3.1469130516052246, "epoch": 3.52, "learning_rate": 3.599135906828215e-05, "loss": 98.4429, "step": 4166, "task_loss": 1.9907824993133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.880204758622393, "compression/movement_sparsity/importance_threshold": -0.0008390141032576601, "compression/movement_sparsity/linear_layer_sparsity": 0.8516011700756431, "compression/movement_sparsity/model_sparsity": 0.8223460500519139, "compression_loss": 93.81331634521484, "distillation_loss": 4.858263969421387, "epoch": 3.52, "learning_rate": 3.5986662909739835e-05, "loss": 98.811, "step": 4167, "task_loss": 3.7204430103302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8804098874157782, "compression/movement_sparsity/importance_threshold": -0.0008375774355849274, "compression/movement_sparsity/linear_layer_sparsity": 0.8518151254155334, "compression/movement_sparsity/model_sparsity": 0.8225526553676753, "compression_loss": 93.8345947265625, "distillation_loss": 4.608565330505371, "epoch": 3.52, "learning_rate": 3.598196675119752e-05, "loss": 98.5426, "step": 4168, "task_loss": 2.3603477478027344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8806147819101673, "compression/movement_sparsity/importance_threshold": -0.0008361424088802324, "compression/movement_sparsity/linear_layer_sparsity": 0.8519441210610181, "compression/movement_sparsity/model_sparsity": 0.8226772196159007, "compression_loss": 93.85592651367188, "distillation_loss": 5.692918300628662, "epoch": 3.52, "learning_rate": 3.597727059265521e-05, "loss": 98.8037, "step": 4169, "task_loss": 2.694190263748169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8808194422394454, "compression/movement_sparsity/importance_threshold": -0.0008347090222058781, "compression/movement_sparsity/linear_layer_sparsity": 0.8519806567106545, "compression/movement_sparsity/model_sparsity": 0.8227125001535751, "compression_loss": 93.877197265625, "distillation_loss": 5.2529296875, "epoch": 3.52, "learning_rate": 3.59725744341129e-05, "loss": 98.4049, "step": 4170, "task_loss": 2.8716211318969727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8810238685374978, "compression/movement_sparsity/importance_threshold": -0.0008332772746241683, "compression/movement_sparsity/linear_layer_sparsity": 0.8520976447193299, "compression/movement_sparsity/model_sparsity": 0.8228254692642554, "compression_loss": 93.8984603881836, "distillation_loss": 6.019613742828369, "epoch": 3.53, "learning_rate": 3.596787827557058e-05, "loss": 98.0078, "step": 4171, "task_loss": 3.4348418712615967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8812280609382095, "compression/movement_sparsity/importance_threshold": -0.000831847165197408, "compression/movement_sparsity/linear_layer_sparsity": 0.8522051887872377, "compression/movement_sparsity/model_sparsity": 0.8229293188625864, "compression_loss": 93.9196548461914, "distillation_loss": 4.286190986633301, "epoch": 3.53, "learning_rate": 3.596318211702827e-05, "loss": 98.1081, "step": 4172, "task_loss": 2.0071964263916016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8814320195754657, "compression/movement_sparsity/importance_threshold": -0.0008304186929879, "compression/movement_sparsity/linear_layer_sparsity": 0.8523320023100451, "compression/movement_sparsity/model_sparsity": 0.8230517759507613, "compression_loss": 93.9407730102539, "distillation_loss": 4.196881294250488, "epoch": 3.53, "learning_rate": 3.595848595848596e-05, "loss": 98.1584, "step": 4173, "task_loss": 2.8888401985168457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8816357445831514, "compression/movement_sparsity/importance_threshold": -0.0008289918570579501, "compression/movement_sparsity/linear_layer_sparsity": 0.8523904188072933, "compression/movement_sparsity/model_sparsity": 0.8231081856616187, "compression_loss": 93.96197509765625, "distillation_loss": 5.669647216796875, "epoch": 3.53, "learning_rate": 3.5953789799943646e-05, "loss": 97.9186, "step": 4174, "task_loss": 4.2195820808410645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8818392360951519, "compression/movement_sparsity/importance_threshold": -0.0008275666564698612, "compression/movement_sparsity/linear_layer_sparsity": 0.8526316566427344, "compression/movement_sparsity/model_sparsity": 0.823341136235278, "compression_loss": 93.98297882080078, "distillation_loss": 4.817070960998535, "epoch": 3.53, "learning_rate": 3.594909364140134e-05, "loss": 98.4848, "step": 4175, "task_loss": 1.8361786603927612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8820424942453525, "compression/movement_sparsity/importance_threshold": -0.0008261430902859355, "compression/movement_sparsity/linear_layer_sparsity": 0.8528830538690013, "compression/movement_sparsity/model_sparsity": 0.8235838971934344, "compression_loss": 94.0041275024414, "distillation_loss": 4.981739044189453, "epoch": 3.53, "learning_rate": 3.594439748285902e-05, "loss": 98.4853, "step": 4176, "task_loss": 2.181946039199829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8822455191676379, "compression/movement_sparsity/importance_threshold": -0.0008247211575684804, "compression/movement_sparsity/linear_layer_sparsity": 0.8530718730635152, "compression/movement_sparsity/model_sparsity": 0.823766229867741, "compression_loss": 94.025146484375, "distillation_loss": 4.7256975173950195, "epoch": 3.53, "learning_rate": 3.593970132431671e-05, "loss": 98.6515, "step": 4177, "task_loss": 3.0115597248077393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8824483109958936, "compression/movement_sparsity/importance_threshold": -0.0008233008573797981, "compression/movement_sparsity/linear_layer_sparsity": 0.8532088459771483, "compression/movement_sparsity/model_sparsity": 0.8238984973404129, "compression_loss": 94.0461654663086, "distillation_loss": 5.346207618713379, "epoch": 3.53, "learning_rate": 3.59350051657744e-05, "loss": 98.5219, "step": 4178, "task_loss": 2.447176933288574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8826508698640047, "compression/movement_sparsity/importance_threshold": -0.0008218821887821916, "compression/movement_sparsity/linear_layer_sparsity": 0.853339677944449, "compression/movement_sparsity/model_sparsity": 0.8240248348271505, "compression_loss": 94.06723022460938, "distillation_loss": 5.215678691864014, "epoch": 3.53, "learning_rate": 3.593030900723209e-05, "loss": 98.2285, "step": 4179, "task_loss": 3.0503692626953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8828531959058562, "compression/movement_sparsity/importance_threshold": -0.0008204651508379656, "compression/movement_sparsity/linear_layer_sparsity": 0.853439554772567, "compression/movement_sparsity/model_sparsity": 0.8241212805789656, "compression_loss": 94.08818817138672, "distillation_loss": 4.184694290161133, "epoch": 3.53, "learning_rate": 3.592561284868977e-05, "loss": 98.3332, "step": 4180, "task_loss": 2.027510166168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8830552892553332, "compression/movement_sparsity/importance_threshold": -0.0008190497426094259, "compression/movement_sparsity/linear_layer_sparsity": 0.8535405643966104, "compression/movement_sparsity/model_sparsity": 0.8242188202116811, "compression_loss": 94.10914611816406, "distillation_loss": 3.8027215003967285, "epoch": 3.53, "learning_rate": 3.592091669014746e-05, "loss": 98.9346, "step": 4181, "task_loss": 2.356431722640991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.883257150046321, "compression/movement_sparsity/importance_threshold": -0.0008176359631588744, "compression/movement_sparsity/linear_layer_sparsity": 0.8536569085002336, "compression/movement_sparsity/model_sparsity": 0.8243311675374285, "compression_loss": 94.13006591796875, "distillation_loss": 4.215858459472656, "epoch": 3.53, "learning_rate": 3.591622053160515e-05, "loss": 98.9113, "step": 4182, "task_loss": 2.6620218753814697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8834587784127047, "compression/movement_sparsity/importance_threshold": -0.0008162238115486152, "compression/movement_sparsity/linear_layer_sparsity": 0.8537399484036496, "compression/movement_sparsity/model_sparsity": 0.8244113547647016, "compression_loss": 94.15107727050781, "distillation_loss": 5.0990400314331055, "epoch": 3.54, "learning_rate": 3.5911524373062836e-05, "loss": 98.8428, "step": 4183, "task_loss": 2.691236734390259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8836601744883694, "compression/movement_sparsity/importance_threshold": -0.0008148132868409539, "compression/movement_sparsity/linear_layer_sparsity": 0.8538972997197726, "compression/movement_sparsity/model_sparsity": 0.8245633005790465, "compression_loss": 94.17195892333984, "distillation_loss": 4.923058986663818, "epoch": 3.54, "learning_rate": 3.590682821452052e-05, "loss": 97.9176, "step": 4184, "task_loss": 2.138990879058838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8838613384072003, "compression/movement_sparsity/importance_threshold": -0.0008134043880981927, "compression/movement_sparsity/linear_layer_sparsity": 0.8539647189635857, "compression/movement_sparsity/model_sparsity": 0.8246284037644288, "compression_loss": 94.19287872314453, "distillation_loss": 3.6656653881073, "epoch": 3.54, "learning_rate": 3.590213205597821e-05, "loss": 97.9746, "step": 4185, "task_loss": 1.2535301446914673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8840622703030825, "compression/movement_sparsity/importance_threshold": -0.0008119971143826362, "compression/movement_sparsity/linear_layer_sparsity": 0.8541374643801264, "compression/movement_sparsity/model_sparsity": 0.8247952148444843, "compression_loss": 94.21368408203125, "distillation_loss": 4.167150497436523, "epoch": 3.54, "learning_rate": 3.58974358974359e-05, "loss": 97.861, "step": 4186, "task_loss": 2.6228694915771484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8842629703099012, "compression/movement_sparsity/importance_threshold": -0.0008105914647565876, "compression/movement_sparsity/linear_layer_sparsity": 0.8543069187263996, "compression/movement_sparsity/model_sparsity": 0.8249588479126605, "compression_loss": 94.23454284667969, "distillation_loss": 4.144308090209961, "epoch": 3.54, "learning_rate": 3.589273973889359e-05, "loss": 98.4715, "step": 4187, "task_loss": 2.6289780139923096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8844634385615413, "compression/movement_sparsity/importance_threshold": -0.0008091874382823534, "compression/movement_sparsity/linear_layer_sparsity": 0.8544358905235492, "compression/movement_sparsity/model_sparsity": 0.8250833891318142, "compression_loss": 94.25535583496094, "distillation_loss": 4.427104949951172, "epoch": 3.54, "learning_rate": 3.5888043580351275e-05, "loss": 98.0066, "step": 4188, "task_loss": 3.208298444747925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8846636751918884, "compression/movement_sparsity/importance_threshold": -0.0008077850340222339, "compression/movement_sparsity/linear_layer_sparsity": 0.8546387086908676, "compression/movement_sparsity/model_sparsity": 0.8252792398711436, "compression_loss": 94.27613067626953, "distillation_loss": 4.748185157775879, "epoch": 3.54, "learning_rate": 3.588334742180896e-05, "loss": 98.2163, "step": 4189, "task_loss": 2.598127841949463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8848636803348271, "compression/movement_sparsity/importance_threshold": -0.0008063842510385366, "compression/movement_sparsity/linear_layer_sparsity": 0.8548470000511308, "compression/movement_sparsity/model_sparsity": 0.8254803757824026, "compression_loss": 94.29689025878906, "distillation_loss": 5.485854148864746, "epoch": 3.54, "learning_rate": 3.587865126326665e-05, "loss": 98.3996, "step": 4190, "task_loss": 1.9777425527572632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.885063454124243, "compression/movement_sparsity/importance_threshold": -0.0008049850883935627, "compression/movement_sparsity/linear_layer_sparsity": 0.854957250905092, "compression/movement_sparsity/model_sparsity": 0.8255868391803589, "compression_loss": 94.317626953125, "distillation_loss": 3.7345404624938965, "epoch": 3.54, "learning_rate": 3.587395510472434e-05, "loss": 98.1652, "step": 4191, "task_loss": 2.169113874435425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.885262996694021, "compression/movement_sparsity/importance_threshold": -0.0008035875451496179, "compression/movement_sparsity/linear_layer_sparsity": 0.8551452711803743, "compression/movement_sparsity/model_sparsity": 0.8257684003807673, "compression_loss": 94.33834838867188, "distillation_loss": 3.2240233421325684, "epoch": 3.54, "learning_rate": 3.586925894618203e-05, "loss": 98.0911, "step": 4192, "task_loss": 2.38122296333313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8854623081780463, "compression/movement_sparsity/importance_threshold": -0.000802191620369006, "compression/movement_sparsity/linear_layer_sparsity": 0.8553374291418262, "compression/movement_sparsity/model_sparsity": 0.8259539571250963, "compression_loss": 94.35904693603516, "distillation_loss": 4.507116794586182, "epoch": 3.54, "learning_rate": 3.5864562787639713e-05, "loss": 98.7274, "step": 4193, "task_loss": 3.1533265113830566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8856613887102041, "compression/movement_sparsity/importance_threshold": -0.0008007973131140293, "compression/movement_sparsity/linear_layer_sparsity": 0.8555357638221136, "compression/movement_sparsity/model_sparsity": 0.8261454783989669, "compression_loss": 94.3796615600586, "distillation_loss": 5.630313396453857, "epoch": 3.54, "learning_rate": 3.58598666290974e-05, "loss": 98.6595, "step": 4194, "task_loss": 3.042914867401123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8858602384243793, "compression/movement_sparsity/importance_threshold": -0.0007994046224469942, "compression/movement_sparsity/linear_layer_sparsity": 0.8557258946750674, "compression/movement_sparsity/model_sparsity": 0.8263290776722109, "compression_loss": 94.4003677368164, "distillation_loss": 5.783313751220703, "epoch": 3.55, "learning_rate": 3.5855170470555086e-05, "loss": 99.3368, "step": 4195, "task_loss": 3.7847111225128174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8860588574544573, "compression/movement_sparsity/importance_threshold": -0.0007980135474302038, "compression/movement_sparsity/linear_layer_sparsity": 0.8559378348306272, "compression/movement_sparsity/model_sparsity": 0.826533737031423, "compression_loss": 94.4209976196289, "distillation_loss": 4.880232334136963, "epoch": 3.55, "learning_rate": 3.585047431201278e-05, "loss": 98.4293, "step": 4196, "task_loss": 1.8543449640274048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8862572459343231, "compression/movement_sparsity/importance_threshold": -0.0007966240871259619, "compression/movement_sparsity/linear_layer_sparsity": 0.8561505142845804, "compression/movement_sparsity/model_sparsity": 0.8267391102918544, "compression_loss": 94.44161224365234, "distillation_loss": 3.903545618057251, "epoch": 3.55, "learning_rate": 3.584577815347046e-05, "loss": 98.8785, "step": 4197, "task_loss": 2.3983712196350098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.886455403997862, "compression/movement_sparsity/importance_threshold": -0.0007952362405965717, "compression/movement_sparsity/linear_layer_sparsity": 0.8561998684144253, "compression/movement_sparsity/model_sparsity": 0.8267867689555081, "compression_loss": 94.46214294433594, "distillation_loss": 4.0785112380981445, "epoch": 3.55, "learning_rate": 3.584108199492815e-05, "loss": 98.8763, "step": 4198, "task_loss": 2.148146867752075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.886653331778959, "compression/movement_sparsity/importance_threshold": -0.0007938500069043387, "compression/movement_sparsity/linear_layer_sparsity": 0.8562818709152571, "compression/movement_sparsity/model_sparsity": 0.8268659544181671, "compression_loss": 94.48271942138672, "distillation_loss": 4.398580074310303, "epoch": 3.55, "learning_rate": 3.583638583638584e-05, "loss": 98.0878, "step": 4199, "task_loss": 2.474411964416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8868510294114992, "compression/movement_sparsity/importance_threshold": -0.0007924653851115659, "compression/movement_sparsity/linear_layer_sparsity": 0.8564375647720786, "compression/movement_sparsity/model_sparsity": 0.8270162997120364, "compression_loss": 94.50326538085938, "distillation_loss": 3.9863524436950684, "epoch": 3.55, "learning_rate": 3.5831689677843525e-05, "loss": 98.4826, "step": 4200, "task_loss": 2.3416385650634766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8870484970293679, "compression/movement_sparsity/importance_threshold": -0.0007910823742805573, "compression/movement_sparsity/linear_layer_sparsity": 0.856601808257095, "compression/movement_sparsity/model_sparsity": 0.8271749009280704, "compression_loss": 94.52374267578125, "distillation_loss": 3.6895673274993896, "epoch": 3.55, "learning_rate": 3.582699351930121e-05, "loss": 98.1414, "step": 4201, "task_loss": 1.4110249280929565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8872457347664502, "compression/movement_sparsity/importance_threshold": -0.0007897009734736168, "compression/movement_sparsity/linear_layer_sparsity": 0.8567363009438599, "compression/movement_sparsity/model_sparsity": 0.827304773377297, "compression_loss": 94.5442123413086, "distillation_loss": 4.26611852645874, "epoch": 3.55, "learning_rate": 3.58222973607589e-05, "loss": 99.3701, "step": 4202, "task_loss": 2.0326406955718994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8874427427566312, "compression/movement_sparsity/importance_threshold": -0.0007883211817530491, "compression/movement_sparsity/linear_layer_sparsity": 0.8569686075777417, "compression/movement_sparsity/model_sparsity": 0.8275290995636463, "compression_loss": 94.56466674804688, "distillation_loss": 3.3899190425872803, "epoch": 3.55, "learning_rate": 3.581760120221659e-05, "loss": 98.6473, "step": 4203, "task_loss": 1.7740589380264282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8876395211337959, "compression/movement_sparsity/importance_threshold": -0.0007869429981811581, "compression/movement_sparsity/linear_layer_sparsity": 0.8569487299902928, "compression/movement_sparsity/model_sparsity": 0.8275099048324768, "compression_loss": 94.58500671386719, "distillation_loss": 3.7976932525634766, "epoch": 3.55, "learning_rate": 3.581290504367428e-05, "loss": 99.2411, "step": 4204, "task_loss": 1.7195155620574951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8878360700318296, "compression/movement_sparsity/importance_threshold": -0.000785566421820246, "compression/movement_sparsity/linear_layer_sparsity": 0.8570022456546426, "compression/movement_sparsity/model_sparsity": 0.8275615820691227, "compression_loss": 94.60545349121094, "distillation_loss": 3.3278534412384033, "epoch": 3.55, "learning_rate": 3.580820888513196e-05, "loss": 98.5774, "step": 4205, "task_loss": 1.694261074066162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8880323895846174, "compression/movement_sparsity/importance_threshold": -0.0007841914517326201, "compression/movement_sparsity/linear_layer_sparsity": 0.857163996988623, "compression/movement_sparsity/model_sparsity": 0.8277177767471756, "compression_loss": 94.62581634521484, "distillation_loss": 6.120212554931641, "epoch": 3.56, "learning_rate": 3.580351272658965e-05, "loss": 99.4717, "step": 4206, "task_loss": 3.3743984699249268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8882284799260447, "compression/movement_sparsity/importance_threshold": -0.0007828180869805801, "compression/movement_sparsity/linear_layer_sparsity": 0.8572585437138077, "compression/movement_sparsity/model_sparsity": 0.8278090755014905, "compression_loss": 94.6461410522461, "distillation_loss": 3.7408790588378906, "epoch": 3.56, "learning_rate": 3.5798816568047336e-05, "loss": 98.6247, "step": 4207, "task_loss": 2.323953866958618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8884243411899961, "compression/movement_sparsity/importance_threshold": -0.0007814463266264341, "compression/movement_sparsity/linear_layer_sparsity": 0.8574244327339579, "compression/movement_sparsity/model_sparsity": 0.8279692657234642, "compression_loss": 94.66641235351562, "distillation_loss": 4.504405975341797, "epoch": 3.56, "learning_rate": 3.579412040950503e-05, "loss": 98.7681, "step": 4208, "task_loss": 2.0915000438690186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8886199735103572, "compression/movement_sparsity/importance_threshold": -0.0007800761697324844, "compression/movement_sparsity/linear_layer_sparsity": 0.8576295999623006, "compression/movement_sparsity/model_sparsity": 0.8281673848263451, "compression_loss": 94.68672180175781, "distillation_loss": 4.816781997680664, "epoch": 3.56, "learning_rate": 3.5789424250962715e-05, "loss": 99.2691, "step": 4209, "task_loss": 3.3535447120666504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.888815377021013, "compression/movement_sparsity/importance_threshold": -0.0007787076153610338, "compression/movement_sparsity/linear_layer_sparsity": 0.8577477803877396, "compression/movement_sparsity/model_sparsity": 0.8282815053906047, "compression_loss": 94.70698547363281, "distillation_loss": 4.467840671539307, "epoch": 3.56, "learning_rate": 3.57847280924204e-05, "loss": 99.0582, "step": 4210, "task_loss": 1.770585536956787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8890105518558485, "compression/movement_sparsity/importance_threshold": -0.0007773406625743882, "compression/movement_sparsity/linear_layer_sparsity": 0.8577616481947001, "compression/movement_sparsity/model_sparsity": 0.8282948967957339, "compression_loss": 94.7271957397461, "distillation_loss": 4.68893575668335, "epoch": 3.56, "learning_rate": 3.578003193387809e-05, "loss": 99.6146, "step": 4211, "task_loss": 3.1243414878845215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.889205498148749, "compression/movement_sparsity/importance_threshold": -0.0007759753104348505, "compression/movement_sparsity/linear_layer_sparsity": 0.8576759015052307, "compression/movement_sparsity/model_sparsity": 0.8282120957688353, "compression_loss": 94.74744415283203, "distillation_loss": 4.043570041656494, "epoch": 3.56, "learning_rate": 3.577533577533578e-05, "loss": 98.8245, "step": 4212, "task_loss": 1.9759941101074219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8894002160335998, "compression/movement_sparsity/importance_threshold": -0.0007746115580047246, "compression/movement_sparsity/linear_layer_sparsity": 0.8578332885938563, "compression/movement_sparsity/model_sparsity": 0.8283640761267874, "compression_loss": 94.76761627197266, "distillation_loss": 3.9328396320343018, "epoch": 3.56, "learning_rate": 3.577063961679347e-05, "loss": 98.1057, "step": 4213, "task_loss": 1.4132288694381714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8895947056442857, "compression/movement_sparsity/importance_threshold": -0.0007732494043463153, "compression/movement_sparsity/linear_layer_sparsity": 0.8578660204340168, "compression/movement_sparsity/model_sparsity": 0.8283956835275434, "compression_loss": 94.78777313232422, "distillation_loss": 3.49436092376709, "epoch": 3.56, "learning_rate": 3.576594345825115e-05, "loss": 98.2827, "step": 4214, "task_loss": 1.6415598392486572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8897889671146918, "compression/movement_sparsity/importance_threshold": -0.0007718888485219264, "compression/movement_sparsity/linear_layer_sparsity": 0.8580190552014555, "compression/movement_sparsity/model_sparsity": 0.8285434610799306, "compression_loss": 94.80792236328125, "distillation_loss": 6.025250434875488, "epoch": 3.56, "learning_rate": 3.576124729970884e-05, "loss": 100.0529, "step": 4215, "task_loss": 3.3867642879486084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8899830005787037, "compression/movement_sparsity/importance_threshold": -0.0007705298895938602, "compression/movement_sparsity/linear_layer_sparsity": 0.8581049330567689, "compression/movement_sparsity/model_sparsity": 0.8286263887667229, "compression_loss": 94.8280258178711, "distillation_loss": 4.967032432556152, "epoch": 3.56, "learning_rate": 3.5756551141166526e-05, "loss": 98.7806, "step": 4216, "task_loss": 2.105295419692993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8901768061702062, "compression/movement_sparsity/importance_threshold": -0.0007691725266244231, "compression/movement_sparsity/linear_layer_sparsity": 0.8582708697735896, "compression/movement_sparsity/model_sparsity": 0.8287866250468396, "compression_loss": 94.84805297851562, "distillation_loss": 3.506471633911133, "epoch": 3.56, "learning_rate": 3.575185498262422e-05, "loss": 98.3469, "step": 4217, "task_loss": 2.086794376373291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8903703840230844, "compression/movement_sparsity/importance_threshold": -0.0007678167586759183, "compression/movement_sparsity/linear_layer_sparsity": 0.8584046708586317, "compression/movement_sparsity/model_sparsity": 0.8289158296529903, "compression_loss": 94.86821746826172, "distillation_loss": 5.5784807205200195, "epoch": 3.57, "learning_rate": 3.57471588240819e-05, "loss": 99.3624, "step": 4218, "task_loss": 2.660674810409546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8905637342712236, "compression/movement_sparsity/importance_threshold": -0.0007664625848106494, "compression/movement_sparsity/linear_layer_sparsity": 0.8584049331903196, "compression/movement_sparsity/model_sparsity": 0.8289160829727777, "compression_loss": 94.88831329345703, "distillation_loss": 3.4981958866119385, "epoch": 3.57, "learning_rate": 3.574246266553959e-05, "loss": 98.4617, "step": 4219, "task_loss": 2.036100387573242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.890756857048509, "compression/movement_sparsity/importance_threshold": -0.0007651100040909197, "compression/movement_sparsity/linear_layer_sparsity": 0.8584708142165077, "compression/movement_sparsity/model_sparsity": 0.8289797007830425, "compression_loss": 94.90825653076172, "distillation_loss": 4.332541465759277, "epoch": 3.57, "learning_rate": 3.573776650699728e-05, "loss": 99.2355, "step": 4220, "task_loss": 3.7314934730529785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8909497524888255, "compression/movement_sparsity/importance_threshold": -0.0007637590155790347, "compression/movement_sparsity/linear_layer_sparsity": 0.8586050564957524, "compression/movement_sparsity/model_sparsity": 0.8291093314270175, "compression_loss": 94.92828369140625, "distillation_loss": 3.470811367034912, "epoch": 3.57, "learning_rate": 3.5733070348454965e-05, "loss": 98.4727, "step": 4221, "task_loss": 1.0225696563720703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8911424207260584, "compression/movement_sparsity/importance_threshold": -0.0007624096183372975, "compression/movement_sparsity/linear_layer_sparsity": 0.8586960498189816, "compression/movement_sparsity/model_sparsity": 0.8291971988496656, "compression_loss": 94.9482650756836, "distillation_loss": 4.0635986328125, "epoch": 3.57, "learning_rate": 3.572837418991266e-05, "loss": 98.9561, "step": 4222, "task_loss": 3.2986013889312744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8913348618940928, "compression/movement_sparsity/importance_threshold": -0.000761061811428012, "compression/movement_sparsity/linear_layer_sparsity": 0.8587614896509672, "compression/movement_sparsity/model_sparsity": 0.8292603906221061, "compression_loss": 94.9681396484375, "distillation_loss": 3.7003707885742188, "epoch": 3.57, "learning_rate": 3.572367803137034e-05, "loss": 98.576, "step": 4223, "task_loss": 1.8450802564620972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8915270761268139, "compression/movement_sparsity/importance_threshold": -0.000759715593913482, "compression/movement_sparsity/linear_layer_sparsity": 0.8589075845528418, "compression/movement_sparsity/model_sparsity": 0.8294014667146608, "compression_loss": 94.98812866210938, "distillation_loss": 4.547817707061768, "epoch": 3.57, "learning_rate": 3.571898187282803e-05, "loss": 99.5571, "step": 4224, "task_loss": 4.161828994750977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8917190635581068, "compression/movement_sparsity/importance_threshold": -0.0007583709648560123, "compression/movement_sparsity/linear_layer_sparsity": 0.8590175969234503, "compression/movement_sparsity/model_sparsity": 0.8295076998219012, "compression_loss": 95.00800323486328, "distillation_loss": 5.438022136688232, "epoch": 3.57, "learning_rate": 3.571428571428572e-05, "loss": 99.3561, "step": 4225, "task_loss": 2.476797103881836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8919108243218565, "compression/movement_sparsity/importance_threshold": -0.0007570279233179069, "compression/movement_sparsity/linear_layer_sparsity": 0.8591231258070277, "compression/movement_sparsity/model_sparsity": 0.829609603463683, "compression_loss": 95.02791595458984, "distillation_loss": 2.6253061294555664, "epoch": 3.57, "learning_rate": 3.57095895557434e-05, "loss": 99.2022, "step": 4226, "task_loss": 1.1413633823394775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8921023585519484, "compression/movement_sparsity/importance_threshold": -0.0007556864683614687, "compression/movement_sparsity/linear_layer_sparsity": 0.8592623881608469, "compression/movement_sparsity/model_sparsity": 0.8297440817272274, "compression_loss": 95.04777526855469, "distillation_loss": 2.835491418838501, "epoch": 3.57, "learning_rate": 3.570489339720109e-05, "loss": 98.7802, "step": 4227, "task_loss": 2.6953446865081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8922936663822675, "compression/movement_sparsity/importance_threshold": -0.0007543465990490026, "compression/movement_sparsity/linear_layer_sparsity": 0.8593862206417453, "compression/movement_sparsity/model_sparsity": 0.8298636601814536, "compression_loss": 95.06761932373047, "distillation_loss": 3.5182652473449707, "epoch": 3.57, "learning_rate": 3.5700197238658776e-05, "loss": 98.9473, "step": 4228, "task_loss": 2.361135959625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.892484747946699, "compression/movement_sparsity/importance_threshold": -0.0007530083144428124, "compression/movement_sparsity/linear_layer_sparsity": 0.8595354754480434, "compression/movement_sparsity/model_sparsity": 0.8300077876259939, "compression_loss": 95.0874252319336, "distillation_loss": 4.198307037353516, "epoch": 3.57, "learning_rate": 3.569550108011647e-05, "loss": 99.7338, "step": 4229, "task_loss": 2.732495069503784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8926756033791279, "compression/movement_sparsity/importance_threshold": -0.000751671613605202, "compression/movement_sparsity/linear_layer_sparsity": 0.8595954420870842, "compression/movement_sparsity/model_sparsity": 0.8300656942265047, "compression_loss": 95.10716247558594, "distillation_loss": 6.82596492767334, "epoch": 3.58, "learning_rate": 3.5690804921574155e-05, "loss": 101.299, "step": 4230, "task_loss": 3.593867540359497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8928662328134394, "compression/movement_sparsity/importance_threshold": -0.0007503364955984753, "compression/movement_sparsity/linear_layer_sparsity": 0.8597991426428077, "compression/movement_sparsity/model_sparsity": 0.8302623970414829, "compression_loss": 95.126953125, "distillation_loss": 3.7819485664367676, "epoch": 3.58, "learning_rate": 3.568610876303184e-05, "loss": 99.0461, "step": 4231, "task_loss": 1.9397151470184326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8930566363835187, "compression/movement_sparsity/importance_threshold": -0.0007490029594849363, "compression/movement_sparsity/linear_layer_sparsity": 0.8599317036144155, "compression/movement_sparsity/model_sparsity": 0.8303904041359107, "compression_loss": 95.14668273925781, "distillation_loss": 4.32335090637207, "epoch": 3.58, "learning_rate": 3.568141260448953e-05, "loss": 99.2191, "step": 4232, "task_loss": 2.7963733673095703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.893246814223251, "compression/movement_sparsity/importance_threshold": -0.0007476710043268879, "compression/movement_sparsity/linear_layer_sparsity": 0.8601192707713277, "compression/movement_sparsity/model_sparsity": 0.8305715277839588, "compression_loss": 95.16647338867188, "distillation_loss": 3.908141613006592, "epoch": 3.58, "learning_rate": 3.5676716445947214e-05, "loss": 98.8688, "step": 4233, "task_loss": 1.5860579013824463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8934367664665213, "compression/movement_sparsity/importance_threshold": -0.0007463406291866375, "compression/movement_sparsity/linear_layer_sparsity": 0.8603196921809513, "compression/movement_sparsity/model_sparsity": 0.8307650641015935, "compression_loss": 95.18608093261719, "distillation_loss": 3.044175148010254, "epoch": 3.58, "learning_rate": 3.567202028740491e-05, "loss": 98.6616, "step": 4234, "task_loss": 2.2728664875030518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8936264932472149, "compression/movement_sparsity/importance_threshold": -0.0007450118331264848, "compression/movement_sparsity/linear_layer_sparsity": 0.8605508063980696, "compression/movement_sparsity/model_sparsity": 0.8309882388343632, "compression_loss": 95.20569610595703, "distillation_loss": 3.559412956237793, "epoch": 3.58, "learning_rate": 3.5667324128862594e-05, "loss": 98.8096, "step": 4235, "task_loss": 1.5963290929794312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8938159946992167, "compression/movement_sparsity/importance_threshold": -0.000743684615208736, "compression/movement_sparsity/linear_layer_sparsity": 0.8607068460557524, "compression/movement_sparsity/model_sparsity": 0.8311389180497707, "compression_loss": 95.22530364990234, "distillation_loss": 4.1290998458862305, "epoch": 3.58, "learning_rate": 3.566262797032028e-05, "loss": 99.0606, "step": 4236, "task_loss": 2.407137632369995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8940052709564119, "compression/movement_sparsity/importance_threshold": -0.0007423589744956962, "compression/movement_sparsity/linear_layer_sparsity": 0.8607349632430378, "compression/movement_sparsity/model_sparsity": 0.8311660693251741, "compression_loss": 95.24494934082031, "distillation_loss": 4.583278179168701, "epoch": 3.58, "learning_rate": 3.5657931811777967e-05, "loss": 99.8632, "step": 4237, "task_loss": 2.260117769241333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.894194322152686, "compression/movement_sparsity/importance_threshold": -0.0007410349100496656, "compression/movement_sparsity/linear_layer_sparsity": 0.8609341445392272, "compression/movement_sparsity/model_sparsity": 0.8313584081310862, "compression_loss": 95.26454162597656, "distillation_loss": 3.7170071601867676, "epoch": 3.58, "learning_rate": 3.565323565323565e-05, "loss": 99.9107, "step": 4238, "task_loss": 1.7686738967895508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8943831484219236, "compression/movement_sparsity/importance_threshold": -0.0007397124209329534, "compression/movement_sparsity/linear_layer_sparsity": 0.8610274153784749, "compression/movement_sparsity/model_sparsity": 0.8314484748300711, "compression_loss": 95.28411865234375, "distillation_loss": 4.712100982666016, "epoch": 3.58, "learning_rate": 3.5648539494693346e-05, "loss": 99.8103, "step": 4239, "task_loss": 2.466038942337036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8945717498980102, "compression/movement_sparsity/importance_threshold": -0.0007383915062078592, "compression/movement_sparsity/linear_layer_sparsity": 0.861216950023047, "compression/movement_sparsity/model_sparsity": 0.8316314983765253, "compression_loss": 95.3037109375, "distillation_loss": 5.715935707092285, "epoch": 3.58, "learning_rate": 3.5643843336151026e-05, "loss": 100.0788, "step": 4240, "task_loss": 3.6584253311157227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8947601267148309, "compression/movement_sparsity/importance_threshold": -0.0007370721649366885, "compression/movement_sparsity/linear_layer_sparsity": 0.861314370472632, "compression/movement_sparsity/model_sparsity": 0.8317255721339667, "compression_loss": 95.3232192993164, "distillation_loss": 5.758817672729492, "epoch": 3.58, "learning_rate": 3.563914717760872e-05, "loss": 100.1407, "step": 4241, "task_loss": 2.5723631381988525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8949482790062707, "compression/movement_sparsity/importance_threshold": -0.0007357543961817453, "compression/movement_sparsity/linear_layer_sparsity": 0.861451677262959, "compression/movement_sparsity/model_sparsity": 0.8318581620136408, "compression_loss": 95.34275817871094, "distillation_loss": 4.0753703117370605, "epoch": 3.59, "learning_rate": 3.5634451019066405e-05, "loss": 99.5099, "step": 4242, "task_loss": 2.9933478832244873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8951362069062149, "compression/movement_sparsity/importance_threshold": -0.0007344381990053335, "compression/movement_sparsity/linear_layer_sparsity": 0.8616367880413381, "compression/movement_sparsity/model_sparsity": 0.8320369136673152, "compression_loss": 95.36219787597656, "distillation_loss": 4.497125625610352, "epoch": 3.59, "learning_rate": 3.56297548605241e-05, "loss": 98.8599, "step": 4243, "task_loss": 2.4475936889648438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8953239105485484, "compression/movement_sparsity/importance_threshold": -0.0007331235724697587, "compression/movement_sparsity/linear_layer_sparsity": 0.8617707799130623, "compression/movement_sparsity/model_sparsity": 0.8321663025060385, "compression_loss": 95.38167572021484, "distillation_loss": 4.050002574920654, "epoch": 3.59, "learning_rate": 3.562505870198178e-05, "loss": 99.796, "step": 4244, "task_loss": 3.2542877197265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8955113900671567, "compression/movement_sparsity/importance_threshold": -0.0007318105156373213, "compression/movement_sparsity/linear_layer_sparsity": 0.8618737331764303, "compression/movement_sparsity/model_sparsity": 0.8322657190080887, "compression_loss": 95.4010238647461, "distillation_loss": 4.273817539215088, "epoch": 3.59, "learning_rate": 3.5620362543439464e-05, "loss": 99.6139, "step": 4245, "task_loss": 1.3939132690429688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8956986455959246, "compression/movement_sparsity/importance_threshold": -0.0007304990275703287, "compression/movement_sparsity/linear_layer_sparsity": 0.8620145695203776, "compression/movement_sparsity/model_sparsity": 0.8324017171903579, "compression_loss": 95.42040252685547, "distillation_loss": 3.3786368370056152, "epoch": 3.59, "learning_rate": 3.561566638489716e-05, "loss": 99.016, "step": 4246, "task_loss": 1.950330138206482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8958856772687375, "compression/movement_sparsity/importance_threshold": -0.0007291891073310831, "compression/movement_sparsity/linear_layer_sparsity": 0.8620702673074047, "compression/movement_sparsity/model_sparsity": 0.8324555015870543, "compression_loss": 95.43971252441406, "distillation_loss": 5.5003581047058105, "epoch": 3.59, "learning_rate": 3.5610970226354844e-05, "loss": 99.241, "step": 4247, "task_loss": 2.521172046661377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8960724852194804, "compression/movement_sparsity/importance_threshold": -0.0007278807539818884, "compression/movement_sparsity/linear_layer_sparsity": 0.8622166364651349, "compression/movement_sparsity/model_sparsity": 0.8325968425139322, "compression_loss": 95.45901489257812, "distillation_loss": 4.9145660400390625, "epoch": 3.59, "learning_rate": 3.560627406781253e-05, "loss": 99.5785, "step": 4248, "task_loss": 2.9931321144104004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8962590695820384, "compression/movement_sparsity/importance_threshold": -0.0007265739665850493, "compression/movement_sparsity/linear_layer_sparsity": 0.8623422933436816, "compression/movement_sparsity/model_sparsity": 0.832718182692135, "compression_loss": 95.47824096679688, "distillation_loss": 4.132366180419922, "epoch": 3.59, "learning_rate": 3.5601577909270216e-05, "loss": 99.7276, "step": 4249, "task_loss": 3.036210775375366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8964454304902967, "compression/movement_sparsity/importance_threshold": -0.0007252687442028706, "compression/movement_sparsity/linear_layer_sparsity": 0.8623711856018633, "compression/movement_sparsity/model_sparsity": 0.8327460824123653, "compression_loss": 95.4974594116211, "distillation_loss": 3.266859531402588, "epoch": 3.59, "learning_rate": 3.559688175072791e-05, "loss": 100.1496, "step": 4250, "task_loss": 1.7120857238769531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8966315680781406, "compression/movement_sparsity/importance_threshold": -0.0007239650858976536, "compression/movement_sparsity/linear_layer_sparsity": 0.862508110818826, "compression/movement_sparsity/model_sparsity": 0.832878303826894, "compression_loss": 95.51670837402344, "distillation_loss": 4.251181125640869, "epoch": 3.59, "learning_rate": 3.5592185592185596e-05, "loss": 99.7999, "step": 4251, "task_loss": 2.348019599914551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8968174824794549, "compression/movement_sparsity/importance_threshold": -0.0007226629907317048, "compression/movement_sparsity/linear_layer_sparsity": 0.8627675449340794, "compression/movement_sparsity/model_sparsity": 0.8331288255821758, "compression_loss": 95.53594970703125, "distillation_loss": 4.091492652893066, "epoch": 3.59, "learning_rate": 3.558748943364328e-05, "loss": 100.3227, "step": 4252, "task_loss": 2.108844757080078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.897003173828125, "compression/movement_sparsity/importance_threshold": -0.0007213624577673272, "compression/movement_sparsity/linear_layer_sparsity": 0.8629095856189578, "compression/movement_sparsity/model_sparsity": 0.8332659867325604, "compression_loss": 95.55511474609375, "distillation_loss": 4.5245771408081055, "epoch": 3.59, "learning_rate": 3.558279327510097e-05, "loss": 100.1262, "step": 4253, "task_loss": 4.600433349609375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.897188642258036, "compression/movement_sparsity/importance_threshold": -0.0007200634860668248, "compression/movement_sparsity/linear_layer_sparsity": 0.8630874226550791, "compression/movement_sparsity/model_sparsity": 0.8334377145194002, "compression_loss": 95.57428741455078, "distillation_loss": 3.55635404586792, "epoch": 3.6, "learning_rate": 3.5578097116558655e-05, "loss": 99.884, "step": 4254, "task_loss": 1.478051781654358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8973738879030729, "compression/movement_sparsity/importance_threshold": -0.0007187660746925022, "compression/movement_sparsity/linear_layer_sparsity": 0.8632548975895248, "compression/movement_sparsity/model_sparsity": 0.8335994361746345, "compression_loss": 95.59339904785156, "distillation_loss": 4.404010772705078, "epoch": 3.6, "learning_rate": 3.557340095801635e-05, "loss": 100.5491, "step": 4255, "task_loss": 1.9814642667770386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8975589108971211, "compression/movement_sparsity/importance_threshold": -0.0007174702227066617, "compression/movement_sparsity/linear_layer_sparsity": 0.8633225195441877, "compression/movement_sparsity/model_sparsity": 0.8336647351071254, "compression_loss": 95.61253356933594, "distillation_loss": 4.804727077484131, "epoch": 3.6, "learning_rate": 3.5568704799474034e-05, "loss": 99.8506, "step": 4256, "task_loss": 3.141603708267212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8977437113740656, "compression/movement_sparsity/importance_threshold": -0.000716175929171608, "compression/movement_sparsity/linear_layer_sparsity": 0.8633619408423919, "compression/movement_sparsity/model_sparsity": 0.8337028021624621, "compression_loss": 95.63155364990234, "distillation_loss": 4.1111602783203125, "epoch": 3.6, "learning_rate": 3.556400864093172e-05, "loss": 99.8709, "step": 4257, "task_loss": 2.270627737045288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8979282894677915, "compression/movement_sparsity/importance_threshold": -0.0007148831931496468, "compression/movement_sparsity/linear_layer_sparsity": 0.8634873950100888, "compression/movement_sparsity/model_sparsity": 0.8338239465935564, "compression_loss": 95.65061950683594, "distillation_loss": 4.5870585441589355, "epoch": 3.6, "learning_rate": 3.555931248238941e-05, "loss": 99.6647, "step": 4258, "task_loss": 2.215709924697876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8981126453121838, "compression/movement_sparsity/importance_threshold": -0.0007135920137030811, "compression/movement_sparsity/linear_layer_sparsity": 0.8636375441289595, "compression/movement_sparsity/model_sparsity": 0.8339689376282813, "compression_loss": 95.66960906982422, "distillation_loss": 3.4400198459625244, "epoch": 3.6, "learning_rate": 3.555461632384709e-05, "loss": 99.6717, "step": 4259, "task_loss": 1.9292771816253662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898296779041128, "compression/movement_sparsity/importance_threshold": -0.000712302389894213, "compression/movement_sparsity/linear_layer_sparsity": 0.8637799902355376, "compression/movement_sparsity/model_sparsity": 0.8341064902728829, "compression_loss": 95.68865203857422, "distillation_loss": 3.7589597702026367, "epoch": 3.6, "learning_rate": 3.5549920165304786e-05, "loss": 100.173, "step": 4260, "task_loss": 2.6841092109680176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898480690788509, "compression/movement_sparsity/importance_threshold": -0.0007110143207853491, "compression/movement_sparsity/linear_layer_sparsity": 0.8638688968294306, "compression/movement_sparsity/model_sparsity": 0.834192342651767, "compression_loss": 95.70760345458984, "distillation_loss": 3.5339272022247314, "epoch": 3.6, "learning_rate": 3.5545224006762466e-05, "loss": 99.4745, "step": 4261, "task_loss": 2.308706283569336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.898664380688212, "compression/movement_sparsity/importance_threshold": -0.0007097278054387923, "compression/movement_sparsity/linear_layer_sparsity": 0.8639742110779906, "compression/movement_sparsity/model_sparsity": 0.8342940390319044, "compression_loss": 95.72660827636719, "distillation_loss": 4.77067232131958, "epoch": 3.6, "learning_rate": 3.554052784822016e-05, "loss": 100.0226, "step": 4262, "task_loss": 2.272444725036621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8988478488741221, "compression/movement_sparsity/importance_threshold": -0.0007084428429168467, "compression/movement_sparsity/linear_layer_sparsity": 0.864079739961568, "compression/movement_sparsity/model_sparsity": 0.834395942673686, "compression_loss": 95.74553680419922, "distillation_loss": 4.050790309906006, "epoch": 3.6, "learning_rate": 3.5535831689677845e-05, "loss": 99.8489, "step": 4263, "task_loss": 2.708509683609009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8990310954801245, "compression/movement_sparsity/importance_threshold": -0.000707159432281816, "compression/movement_sparsity/linear_layer_sparsity": 0.864218286865329, "compression/movement_sparsity/model_sparsity": 0.8345297300650828, "compression_loss": 95.76445770263672, "distillation_loss": 3.9858670234680176, "epoch": 3.6, "learning_rate": 3.553113553113553e-05, "loss": 100.1182, "step": 4264, "task_loss": 1.1736524105072021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8992141206401043, "compression/movement_sparsity/importance_threshold": -0.0007058775725960042, "compression/movement_sparsity/linear_layer_sparsity": 0.8643814452510905, "compression/movement_sparsity/model_sparsity": 0.8346872834583595, "compression_loss": 95.78329467773438, "distillation_loss": 4.853404521942139, "epoch": 3.6, "learning_rate": 3.5526439372593225e-05, "loss": 100.4709, "step": 4265, "task_loss": 2.4195873737335205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8993969244879467, "compression/movement_sparsity/importance_threshold": -0.0007045972629217152, "compression/movement_sparsity/linear_layer_sparsity": 0.8645093796456557, "compression/movement_sparsity/model_sparsity": 0.8348108229128991, "compression_loss": 95.80215454101562, "distillation_loss": 4.108983039855957, "epoch": 3.61, "learning_rate": 3.5521743214050904e-05, "loss": 100.4064, "step": 4266, "task_loss": 1.4528888463974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8995795071575368, "compression/movement_sparsity/importance_threshold": -0.0007033185023212528, "compression/movement_sparsity/linear_layer_sparsity": 0.864595329045975, "compression/movement_sparsity/model_sparsity": 0.8348938196869062, "compression_loss": 95.82109069824219, "distillation_loss": 4.723228454589844, "epoch": 3.61, "learning_rate": 3.55170470555086e-05, "loss": 100.7143, "step": 4267, "task_loss": 3.076694965362549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8997618687827597, "compression/movement_sparsity/importance_threshold": -0.0007020412898569219, "compression/movement_sparsity/linear_layer_sparsity": 0.8648336454603454, "compression/movement_sparsity/model_sparsity": 0.8351239491992959, "compression_loss": 95.83995819091797, "distillation_loss": 3.680413246154785, "epoch": 3.61, "learning_rate": 3.5512350896966284e-05, "loss": 99.8117, "step": 4268, "task_loss": 2.0536088943481445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8999440094975006, "compression/movement_sparsity/importance_threshold": -0.0007007656245910255, "compression/movement_sparsity/linear_layer_sparsity": 0.8648962473404337, "compression/movement_sparsity/model_sparsity": 0.8351844005122172, "compression_loss": 95.85875701904297, "distillation_loss": 5.204758644104004, "epoch": 3.61, "learning_rate": 3.550765473842397e-05, "loss": 100.0336, "step": 4269, "task_loss": 2.403553009033203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9001259294356445, "compression/movement_sparsity/importance_threshold": -0.00069949150558587, "compression/movement_sparsity/linear_layer_sparsity": 0.8650124006573746, "compression/movement_sparsity/model_sparsity": 0.8352965636053918, "compression_loss": 95.87752532958984, "distillation_loss": 4.010772705078125, "epoch": 3.61, "learning_rate": 3.5502958579881656e-05, "loss": 100.0025, "step": 4270, "task_loss": 1.8188772201538086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9003076287310768, "compression/movement_sparsity/importance_threshold": -0.0006982189319037551, "compression/movement_sparsity/linear_layer_sparsity": 0.8651424933262819, "compression/movement_sparsity/model_sparsity": 0.8354221871909103, "compression_loss": 95.89631652832031, "distillation_loss": 3.156705856323242, "epoch": 3.61, "learning_rate": 3.549826242133934e-05, "loss": 100.1325, "step": 4271, "task_loss": 2.598294258117676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9004891075176825, "compression/movement_sparsity/importance_threshold": -0.0006969479026069881, "compression/movement_sparsity/linear_layer_sparsity": 0.8652795497090885, "compression/movement_sparsity/model_sparsity": 0.8355545352653327, "compression_loss": 95.91511535644531, "distillation_loss": 4.084255218505859, "epoch": 3.61, "learning_rate": 3.5493566262797036e-05, "loss": 99.616, "step": 4272, "task_loss": 3.53688383102417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9006703659293467, "compression/movement_sparsity/importance_threshold": -0.0006956784167578721, "compression/movement_sparsity/linear_layer_sparsity": 0.8653527164017021, "compression/movement_sparsity/model_sparsity": 0.835625188456968, "compression_loss": 95.933837890625, "distillation_loss": 4.723835468292236, "epoch": 3.61, "learning_rate": 3.548887010425472e-05, "loss": 100.3172, "step": 4273, "task_loss": 2.9156343936920166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9008514040999546, "compression/movement_sparsity/importance_threshold": -0.00069441047341871, "compression/movement_sparsity/linear_layer_sparsity": 0.8653602643998156, "compression/movement_sparsity/model_sparsity": 0.8356324771581259, "compression_loss": 95.95258331298828, "distillation_loss": 4.618986129760742, "epoch": 3.61, "learning_rate": 3.548417394571241e-05, "loss": 100.2825, "step": 4274, "task_loss": 2.5440514087677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9010322221633913, "compression/movement_sparsity/importance_threshold": -0.0006931440716518075, "compression/movement_sparsity/linear_layer_sparsity": 0.8655083864101883, "compression/movement_sparsity/model_sparsity": 0.8357755107217658, "compression_loss": 95.97126007080078, "distillation_loss": 3.871387004852295, "epoch": 3.61, "learning_rate": 3.5479477787170095e-05, "loss": 99.8351, "step": 4275, "task_loss": 2.873246908187866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.901212820253542, "compression/movement_sparsity/importance_threshold": -0.0006918792105194676, "compression/movement_sparsity/linear_layer_sparsity": 0.8656134860237308, "compression/movement_sparsity/model_sparsity": 0.8358769998402589, "compression_loss": 95.99002075195312, "distillation_loss": 3.659684896469116, "epoch": 3.61, "learning_rate": 3.547478162862778e-05, "loss": 99.8265, "step": 4276, "task_loss": 1.4133249521255493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9013931985042917, "compression/movement_sparsity/importance_threshold": -0.0006906158890839951, "compression/movement_sparsity/linear_layer_sparsity": 0.8657292100706369, "compression/movement_sparsity/model_sparsity": 0.8359887484101448, "compression_loss": 96.0086669921875, "distillation_loss": 5.459488868713379, "epoch": 3.61, "learning_rate": 3.5470085470085474e-05, "loss": 99.9944, "step": 4277, "task_loss": 2.44586181640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9015733570495258, "compression/movement_sparsity/importance_threshold": -0.0006893541064076913, "compression/movement_sparsity/linear_layer_sparsity": 0.865974967165612, "compression/movement_sparsity/model_sparsity": 0.8362260629928704, "compression_loss": 96.02733612060547, "distillation_loss": 3.229130744934082, "epoch": 3.62, "learning_rate": 3.5465389311543154e-05, "loss": 100.0239, "step": 4278, "task_loss": 2.019221782684326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9017532960231291, "compression/movement_sparsity/importance_threshold": -0.0006880938615528645, "compression/movement_sparsity/linear_layer_sparsity": 0.8660275765932214, "compression/movement_sparsity/model_sparsity": 0.8362768651247958, "compression_loss": 96.04586791992188, "distillation_loss": 3.4382500648498535, "epoch": 3.62, "learning_rate": 3.546069315300085e-05, "loss": 99.7238, "step": 4279, "task_loss": 2.159698009490967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9019330155589871, "compression/movement_sparsity/importance_threshold": -0.000686835153581815, "compression/movement_sparsity/linear_layer_sparsity": 0.8661498350839919, "compression/movement_sparsity/model_sparsity": 0.8363949236602973, "compression_loss": 96.06439208984375, "distillation_loss": 4.059858798980713, "epoch": 3.62, "learning_rate": 3.5455996994458533e-05, "loss": 100.2131, "step": 4280, "task_loss": 1.8517813682556152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9021125157909847, "compression/movement_sparsity/importance_threshold": -0.0006855779815568485, "compression/movement_sparsity/linear_layer_sparsity": 0.8662729759631674, "compression/movement_sparsity/model_sparsity": 0.8365138342714475, "compression_loss": 96.08303833007812, "distillation_loss": 5.325155258178711, "epoch": 3.62, "learning_rate": 3.5451300835916227e-05, "loss": 100.6121, "step": 4281, "task_loss": 2.363600492477417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9022917968530071, "compression/movement_sparsity/importance_threshold": -0.0006843223445402698, "compression/movement_sparsity/linear_layer_sparsity": 0.8664202752059733, "compression/movement_sparsity/model_sparsity": 0.8366560733321174, "compression_loss": 96.1015625, "distillation_loss": 5.921926975250244, "epoch": 3.62, "learning_rate": 3.544660467737391e-05, "loss": 100.867, "step": 4282, "task_loss": 2.688180446624756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9024708588789395, "compression/movement_sparsity/importance_threshold": -0.0006830682415943801, "compression/movement_sparsity/linear_layer_sparsity": 0.8665083828806347, "compression/movement_sparsity/model_sparsity": 0.8367411542371034, "compression_loss": 96.1201400756836, "distillation_loss": 4.978620529174805, "epoch": 3.62, "learning_rate": 3.544190851883159e-05, "loss": 100.4941, "step": 4283, "task_loss": 2.4804446697235107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9026497020026669, "compression/movement_sparsity/importance_threshold": -0.000681815671781486, "compression/movement_sparsity/linear_layer_sparsity": 0.8667356813641095, "compression/movement_sparsity/model_sparsity": 0.8369606443184189, "compression_loss": 96.13858032226562, "distillation_loss": 5.0138092041015625, "epoch": 3.62, "learning_rate": 3.5437212360289286e-05, "loss": 100.8471, "step": 4284, "task_loss": 3.699549913406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9028283263580746, "compression/movement_sparsity/importance_threshold": -0.0006805646341638896, "compression/movement_sparsity/linear_layer_sparsity": 0.8667421204146328, "compression/movement_sparsity/model_sparsity": 0.836966862167748, "compression_loss": 96.15709686279297, "distillation_loss": 3.905506134033203, "epoch": 3.62, "learning_rate": 3.543251620174697e-05, "loss": 99.9716, "step": 4285, "task_loss": 2.4717812538146973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9030067320790476, "compression/movement_sparsity/importance_threshold": -0.0006793151278038975, "compression/movement_sparsity/linear_layer_sparsity": 0.8668749317937611, "compression/movement_sparsity/model_sparsity": 0.8370951110674275, "compression_loss": 96.17554473876953, "distillation_loss": 3.2638678550720215, "epoch": 3.62, "learning_rate": 3.5427820043204665e-05, "loss": 100.0794, "step": 4286, "task_loss": 2.090071439743042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9031849192994712, "compression/movement_sparsity/importance_threshold": -0.0006780671517638108, "compression/movement_sparsity/linear_layer_sparsity": 0.8669994201038795, "compression/movement_sparsity/model_sparsity": 0.8372153228211225, "compression_loss": 96.19392395019531, "distillation_loss": 2.7314634323120117, "epoch": 3.62, "learning_rate": 3.5423123884662345e-05, "loss": 99.3524, "step": 4287, "task_loss": 1.4440038204193115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9033628881532304, "compression/movement_sparsity/importance_threshold": -0.0006768207051059344, "compression/movement_sparsity/linear_layer_sparsity": 0.8671196634103195, "compression/movement_sparsity/model_sparsity": 0.8373314354000747, "compression_loss": 96.21239471435547, "distillation_loss": 4.933556079864502, "epoch": 3.62, "learning_rate": 3.541842772612004e-05, "loss": 100.0904, "step": 4288, "task_loss": 1.985803484916687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9035406387742103, "compression/movement_sparsity/importance_threshold": -0.000675575786892574, "compression/movement_sparsity/linear_layer_sparsity": 0.8671821698970668, "compression/movement_sparsity/model_sparsity": 0.8373917945967096, "compression_loss": 96.230712890625, "distillation_loss": 3.775048017501831, "epoch": 3.63, "learning_rate": 3.5413731567577724e-05, "loss": 100.6447, "step": 4289, "task_loss": 2.8999719619750977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9037181712962963, "compression/movement_sparsity/importance_threshold": -0.0006743323961860307, "compression/movement_sparsity/linear_layer_sparsity": 0.8672553604380157, "compression/movement_sparsity/model_sparsity": 0.8374624708174165, "compression_loss": 96.24906921386719, "distillation_loss": 3.9476237297058105, "epoch": 3.63, "learning_rate": 3.540903540903541e-05, "loss": 99.6322, "step": 4290, "task_loss": 2.2831263542175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9038954858533733, "compression/movement_sparsity/importance_threshold": -0.0006730905320486095, "compression/movement_sparsity/linear_layer_sparsity": 0.8672889865907488, "compression/movement_sparsity/model_sparsity": 0.8374949418083572, "compression_loss": 96.26739501953125, "distillation_loss": 4.304094314575195, "epoch": 3.63, "learning_rate": 3.54043392504931e-05, "loss": 100.6191, "step": 4291, "task_loss": 1.8310171365737915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9040725825793265, "compression/movement_sparsity/importance_threshold": -0.000671850193542615, "compression/movement_sparsity/linear_layer_sparsity": 0.8673930249533718, "compression/movement_sparsity/model_sparsity": 0.8375954061331645, "compression_loss": 96.2857437133789, "distillation_loss": 3.959082841873169, "epoch": 3.63, "learning_rate": 3.539964309195078e-05, "loss": 100.2784, "step": 4292, "task_loss": 1.9013115167617798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9042494616080411, "compression/movement_sparsity/importance_threshold": -0.000670611379730352, "compression/movement_sparsity/linear_layer_sparsity": 0.8675244531290542, "compression/movement_sparsity/model_sparsity": 0.837722319346692, "compression_loss": 96.30399322509766, "distillation_loss": 4.970921039581299, "epoch": 3.63, "learning_rate": 3.5394946933408476e-05, "loss": 100.9185, "step": 4293, "task_loss": 3.71980357170105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9044261230734021, "compression/movement_sparsity/importance_threshold": -0.0006693740896741227, "compression/movement_sparsity/linear_layer_sparsity": 0.8676575387640381, "compression/movement_sparsity/model_sparsity": 0.8378508330806949, "compression_loss": 96.32227325439453, "distillation_loss": 5.669394016265869, "epoch": 3.63, "learning_rate": 3.539025077486616e-05, "loss": 100.9656, "step": 4294, "task_loss": 2.603111743927002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9046025671092948, "compression/movement_sparsity/importance_threshold": -0.0006681383224362326, "compression/movement_sparsity/linear_layer_sparsity": 0.8677038641553034, "compression/movement_sparsity/model_sparsity": 0.8378955670522567, "compression_loss": 96.34054565429688, "distillation_loss": 3.915592670440674, "epoch": 3.63, "learning_rate": 3.538555461632385e-05, "loss": 100.3994, "step": 4295, "task_loss": 1.5478243827819824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9047787938496042, "compression/movement_sparsity/importance_threshold": -0.0006669040770789841, "compression/movement_sparsity/linear_layer_sparsity": 0.8677480431963943, "compression/movement_sparsity/model_sparsity": 0.8379382284073754, "compression_loss": 96.35874938964844, "distillation_loss": 3.8440425395965576, "epoch": 3.63, "learning_rate": 3.5380858457781535e-05, "loss": 100.9256, "step": 4296, "task_loss": 2.1421358585357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9049548034282157, "compression/movement_sparsity/importance_threshold": -0.0006656713526646818, "compression/movement_sparsity/linear_layer_sparsity": 0.867913324083995, "compression/movement_sparsity/model_sparsity": 0.8380978313880235, "compression_loss": 96.3769760131836, "distillation_loss": 6.172903537750244, "epoch": 3.63, "learning_rate": 3.537616229923922e-05, "loss": 100.3617, "step": 4297, "task_loss": 2.9490866661071777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9051305959790141, "compression/movement_sparsity/importance_threshold": -0.0006644401482556296, "compression/movement_sparsity/linear_layer_sparsity": 0.8680980294406745, "compression/movement_sparsity/model_sparsity": 0.838276191547481, "compression_loss": 96.39523315429688, "distillation_loss": 3.86124849319458, "epoch": 3.63, "learning_rate": 3.5371466140696915e-05, "loss": 101.3411, "step": 4298, "task_loss": 2.0283093452453613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9053061716358847, "compression/movement_sparsity/importance_threshold": -0.0006632104629141324, "compression/movement_sparsity/linear_layer_sparsity": 0.8681946867435308, "compression/movement_sparsity/model_sparsity": 0.8383695283746315, "compression_loss": 96.41343688964844, "distillation_loss": 6.689557075500488, "epoch": 3.63, "learning_rate": 3.53667699821546e-05, "loss": 101.0556, "step": 4299, "task_loss": 2.874082088470459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9054815305327127, "compression/movement_sparsity/importance_threshold": -0.0006619822957024931, "compression/movement_sparsity/linear_layer_sparsity": 0.8682772496802416, "compression/movement_sparsity/model_sparsity": 0.8384492550204729, "compression_loss": 96.43156433105469, "distillation_loss": 3.4758646488189697, "epoch": 3.63, "learning_rate": 3.536207382361229e-05, "loss": 100.6492, "step": 4300, "task_loss": 1.9366663694381714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9056566728033831, "compression/movement_sparsity/importance_threshold": -0.0006607556456830165, "compression/movement_sparsity/linear_layer_sparsity": 0.8683327566805865, "compression/movement_sparsity/model_sparsity": 0.8385028551845964, "compression_loss": 96.44973754882812, "distillation_loss": 6.7592010498046875, "epoch": 3.64, "learning_rate": 3.5357377665069974e-05, "loss": 101.5295, "step": 4301, "task_loss": 3.4434211254119873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9058315985817812, "compression/movement_sparsity/importance_threshold": -0.0006595305119180056, "compression/movement_sparsity/linear_layer_sparsity": 0.8684185749150618, "compression/movement_sparsity/model_sparsity": 0.8385857252987098, "compression_loss": 96.46785736083984, "distillation_loss": 6.425994873046875, "epoch": 3.64, "learning_rate": 3.535268150652766e-05, "loss": 100.966, "step": 4302, "task_loss": 5.2162885665893555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.906006308001792, "compression/movement_sparsity/importance_threshold": -0.0006583068934697652, "compression/movement_sparsity/linear_layer_sparsity": 0.868503904258664, "compression/movement_sparsity/model_sparsity": 0.8386681233168556, "compression_loss": 96.48593139648438, "distillation_loss": 2.6008460521698, "epoch": 3.64, "learning_rate": 3.534798534798535e-05, "loss": 100.4335, "step": 4303, "task_loss": 1.2341915369033813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9061808011973007, "compression/movement_sparsity/importance_threshold": -0.0006570847894005992, "compression/movement_sparsity/linear_layer_sparsity": 0.8686686008620506, "compression/movement_sparsity/model_sparsity": 0.8388271620852498, "compression_loss": 96.50395202636719, "distillation_loss": 4.709802627563477, "epoch": 3.64, "learning_rate": 3.534328918944303e-05, "loss": 100.8767, "step": 4304, "task_loss": 2.802543878555298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9063550783021925, "compression/movement_sparsity/importance_threshold": -0.0006558641987728115, "compression/movement_sparsity/linear_layer_sparsity": 0.8687918132862319, "compression/movement_sparsity/model_sparsity": 0.8389461417836147, "compression_loss": 96.5219497680664, "distillation_loss": 5.595571517944336, "epoch": 3.64, "learning_rate": 3.5338593030900726e-05, "loss": 100.5052, "step": 4305, "task_loss": 3.3218724727630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9065291394503525, "compression/movement_sparsity/importance_threshold": -0.0006546451206487051, "compression/movement_sparsity/linear_layer_sparsity": 0.8689184598706924, "compression/movement_sparsity/model_sparsity": 0.8390684376682885, "compression_loss": 96.5399398803711, "distillation_loss": 2.7710704803466797, "epoch": 3.64, "learning_rate": 3.533389687235841e-05, "loss": 100.2822, "step": 4306, "task_loss": 1.3011189699172974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9067029847756658, "compression/movement_sparsity/importance_threshold": -0.0006534275540905857, "compression/movement_sparsity/linear_layer_sparsity": 0.8689269379538815, "compression/movement_sparsity/model_sparsity": 0.8390766245032385, "compression_loss": 96.55785369873047, "distillation_loss": 3.722095012664795, "epoch": 3.64, "learning_rate": 3.5329200713816105e-05, "loss": 100.1439, "step": 4307, "task_loss": 2.2931594848632812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9068766144120175, "compression/movement_sparsity/importance_threshold": -0.0006522114981607563, "compression/movement_sparsity/linear_layer_sparsity": 0.8689741218852166, "compression/movement_sparsity/model_sparsity": 0.8391221875213775, "compression_loss": 96.5757064819336, "distillation_loss": 4.981956481933594, "epoch": 3.64, "learning_rate": 3.5324504555273785e-05, "loss": 100.9473, "step": 4308, "task_loss": 2.4355037212371826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9070500284932929, "compression/movement_sparsity/importance_threshold": -0.00065099695192152, "compression/movement_sparsity/linear_layer_sparsity": 0.869105752771749, "compression/movement_sparsity/model_sparsity": 0.8392492964820134, "compression_loss": 96.59356689453125, "distillation_loss": 3.977102279663086, "epoch": 3.64, "learning_rate": 3.531980839673147e-05, "loss": 100.9463, "step": 4309, "task_loss": 2.4195563793182373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9072232271533769, "compression/movement_sparsity/importance_threshold": -0.0006497839144351831, "compression/movement_sparsity/linear_layer_sparsity": 0.8692262703340445, "compression/movement_sparsity/model_sparsity": 0.8393656738952888, "compression_loss": 96.6114501953125, "distillation_loss": 6.106236457824707, "epoch": 3.64, "learning_rate": 3.5315112238189164e-05, "loss": 101.4508, "step": 4310, "task_loss": 3.640273094177246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9073962105261548, "compression/movement_sparsity/importance_threshold": -0.0006485723847640479, "compression/movement_sparsity/linear_layer_sparsity": 0.8693709820324734, "compression/movement_sparsity/model_sparsity": 0.8395054143016913, "compression_loss": 96.62923431396484, "distillation_loss": 4.5087432861328125, "epoch": 3.64, "learning_rate": 3.531041607964685e-05, "loss": 101.0761, "step": 4311, "task_loss": 2.4355404376983643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9075689787455119, "compression/movement_sparsity/importance_threshold": -0.0006473623619704183, "compression/movement_sparsity/linear_layer_sparsity": 0.8695083722919739, "compression/movement_sparsity/model_sparsity": 0.8396380847831161, "compression_loss": 96.64703369140625, "distillation_loss": 3.368297576904297, "epoch": 3.64, "learning_rate": 3.5305719921104544e-05, "loss": 100.7962, "step": 4312, "task_loss": 2.0810179710388184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.907741531945333, "compression/movement_sparsity/importance_threshold": -0.0006461538451165991, "compression/movement_sparsity/linear_layer_sparsity": 0.8696201375152248, "compression/movement_sparsity/model_sparsity": 0.8397460105271183, "compression_loss": 96.6648178100586, "distillation_loss": 3.4189343452453613, "epoch": 3.65, "learning_rate": 3.530102376256222e-05, "loss": 99.7394, "step": 4313, "task_loss": 1.8139153718948364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9079138702595035, "compression/movement_sparsity/importance_threshold": -0.0006449468332648933, "compression/movement_sparsity/linear_layer_sparsity": 0.8697905696432442, "compression/movement_sparsity/model_sparsity": 0.8399105877872297, "compression_loss": 96.68257141113281, "distillation_loss": 4.248610496520996, "epoch": 3.65, "learning_rate": 3.5296327604019916e-05, "loss": 101.1133, "step": 4314, "task_loss": 2.6366236209869385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9080859938219084, "compression/movement_sparsity/importance_threshold": -0.0006437413254776073, "compression/movement_sparsity/linear_layer_sparsity": 0.8698971478535735, "compression/movement_sparsity/model_sparsity": 0.8400135047081613, "compression_loss": 96.70030975341797, "distillation_loss": 4.740636825561523, "epoch": 3.65, "learning_rate": 3.52916314454776e-05, "loss": 101.2973, "step": 4315, "task_loss": 2.3883512020111084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9082579027664329, "compression/movement_sparsity/importance_threshold": -0.0006425373208170408, "compression/movement_sparsity/linear_layer_sparsity": 0.8700576590741199, "compression/movement_sparsity/model_sparsity": 0.8401685018744915, "compression_loss": 96.71800231933594, "distillation_loss": 4.079943656921387, "epoch": 3.65, "learning_rate": 3.528693528693529e-05, "loss": 101.5221, "step": 4316, "task_loss": 2.5724759101867676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.908429597226962, "compression/movement_sparsity/importance_threshold": -0.0006413348183455028, "compression/movement_sparsity/linear_layer_sparsity": 0.870175672561212, "compression/movement_sparsity/model_sparsity": 0.8402824612352502, "compression_loss": 96.73567962646484, "distillation_loss": 5.691556930541992, "epoch": 3.65, "learning_rate": 3.5282239128392975e-05, "loss": 101.5329, "step": 4317, "task_loss": 3.272876739501953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9086010773373813, "compression/movement_sparsity/importance_threshold": -0.0006401338171252921, "compression/movement_sparsity/linear_layer_sparsity": 0.870365994200848, "compression/movement_sparsity/model_sparsity": 0.8404662447410668, "compression_loss": 96.75335693359375, "distillation_loss": 3.6307127475738525, "epoch": 3.65, "learning_rate": 3.527754296985066e-05, "loss": 100.7863, "step": 4318, "task_loss": 2.4787747859954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9087723432315753, "compression/movement_sparsity/importance_threshold": -0.0006389343162187169, "compression/movement_sparsity/linear_layer_sparsity": 0.8704239695038936, "compression/movement_sparsity/model_sparsity": 0.8405222284140998, "compression_loss": 96.77095794677734, "distillation_loss": 4.005181312561035, "epoch": 3.65, "learning_rate": 3.5272846811308355e-05, "loss": 100.9218, "step": 4319, "task_loss": 1.1956453323364258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9089433950434296, "compression/movement_sparsity/importance_threshold": -0.0006377363146880784, "compression/movement_sparsity/linear_layer_sparsity": 0.8705706844624852, "compression/movement_sparsity/model_sparsity": 0.8406639032625159, "compression_loss": 96.78865051269531, "distillation_loss": 4.763053894042969, "epoch": 3.65, "learning_rate": 3.526815065276604e-05, "loss": 100.9945, "step": 4320, "task_loss": 2.3702237606048584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9091142329068291, "compression/movement_sparsity/importance_threshold": -0.0006365398115956832, "compression/movement_sparsity/linear_layer_sparsity": 0.8706796236580064, "compression/movement_sparsity/model_sparsity": 0.8407691000615348, "compression_loss": 96.80622863769531, "distillation_loss": 4.643828392028809, "epoch": 3.65, "learning_rate": 3.526345449422373e-05, "loss": 101.0123, "step": 4321, "task_loss": 2.353712797164917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9092848569556591, "compression/movement_sparsity/importance_threshold": -0.0006353448060038326, "compression/movement_sparsity/linear_layer_sparsity": 0.8708399202435354, "compression/movement_sparsity/model_sparsity": 0.8409238899662208, "compression_loss": 96.82382202148438, "distillation_loss": 4.092759132385254, "epoch": 3.65, "learning_rate": 3.5258758335681414e-05, "loss": 102.0031, "step": 4322, "task_loss": 1.0409988164901733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9094552673238047, "compression/movement_sparsity/importance_threshold": -0.0006341512969748322, "compression/movement_sparsity/linear_layer_sparsity": 0.8708245857639557, "compression/movement_sparsity/model_sparsity": 0.8409090822731891, "compression_loss": 96.84142303466797, "distillation_loss": 4.551883697509766, "epoch": 3.65, "learning_rate": 3.52540621771391e-05, "loss": 100.6823, "step": 4323, "task_loss": 2.5566530227661133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9096254641451509, "compression/movement_sparsity/importance_threshold": -0.0006329592835709859, "compression/movement_sparsity/linear_layer_sparsity": 0.8710269627370716, "compression/movement_sparsity/model_sparsity": 0.841104506974694, "compression_loss": 96.85894012451172, "distillation_loss": 4.072083473205566, "epoch": 3.65, "learning_rate": 3.524936601859679e-05, "loss": 101.1207, "step": 4324, "task_loss": 2.2178080081939697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.909795447553583, "compression/movement_sparsity/importance_threshold": -0.0006317687648545976, "compression/movement_sparsity/linear_layer_sparsity": 0.871089624237998, "compression/movement_sparsity/model_sparsity": 0.8411650158602944, "compression_loss": 96.87652587890625, "distillation_loss": 4.939325332641602, "epoch": 3.66, "learning_rate": 3.524466986005447e-05, "loss": 101.4992, "step": 4325, "task_loss": 1.893997311592102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9099652176829861, "compression/movement_sparsity/importance_threshold": -0.0006305797398879704, "compression/movement_sparsity/linear_layer_sparsity": 0.8713035438053853, "compression/movement_sparsity/model_sparsity": 0.8413715866324484, "compression_loss": 96.89399719238281, "distillation_loss": 5.318375587463379, "epoch": 3.66, "learning_rate": 3.5239973701512166e-05, "loss": 101.3714, "step": 4326, "task_loss": 2.883091926574707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9101347746672453, "compression/movement_sparsity/importance_threshold": -0.0006293922077334089, "compression/movement_sparsity/linear_layer_sparsity": 0.8713783083364622, "compression/movement_sparsity/model_sparsity": 0.8414437827718801, "compression_loss": 96.91153717041016, "distillation_loss": 5.299679756164551, "epoch": 3.66, "learning_rate": 3.523527754296985e-05, "loss": 100.9041, "step": 4327, "task_loss": 2.580761194229126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.910304118640246, "compression/movement_sparsity/importance_threshold": -0.0006282061674532172, "compression/movement_sparsity/linear_layer_sparsity": 0.8714548495685168, "compression/movement_sparsity/model_sparsity": 0.8415176945771453, "compression_loss": 96.92901611328125, "distillation_loss": 4.32697057723999, "epoch": 3.66, "learning_rate": 3.523058138442754e-05, "loss": 100.9766, "step": 4328, "task_loss": 2.1089797019958496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9104732497358727, "compression/movement_sparsity/importance_threshold": -0.0006270216181096999, "compression/movement_sparsity/linear_layer_sparsity": 0.87157865820108, "compression/movement_sparsity/model_sparsity": 0.8416372500023, "compression_loss": 96.94647216796875, "distillation_loss": 4.445263385772705, "epoch": 3.66, "learning_rate": 3.522588522588523e-05, "loss": 101.4044, "step": 4329, "task_loss": 1.5468918085098267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9106421680880112, "compression/movement_sparsity/importance_threshold": -0.0006258385587651592, "compression/movement_sparsity/linear_layer_sparsity": 0.8716285846909714, "compression/movement_sparsity/model_sparsity": 0.8416854613636717, "compression_loss": 96.9638671875, "distillation_loss": 4.71537971496582, "epoch": 3.66, "learning_rate": 3.522118906734291e-05, "loss": 101.8231, "step": 4330, "task_loss": 2.995622396469116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9108108738305466, "compression/movement_sparsity/importance_threshold": -0.0006246569884819, "compression/movement_sparsity/linear_layer_sparsity": 0.87176033481918, "compression/movement_sparsity/model_sparsity": 0.8418126854696657, "compression_loss": 96.98127746582031, "distillation_loss": 4.868564128875732, "epoch": 3.66, "learning_rate": 3.5216492908800604e-05, "loss": 101.7384, "step": 4331, "task_loss": 2.103773832321167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9109793670973635, "compression/movement_sparsity/importance_threshold": -0.0006234769063222277, "compression/movement_sparsity/linear_layer_sparsity": 0.8718979158653626, "compression/movement_sparsity/model_sparsity": 0.8419455401836631, "compression_loss": 96.99869537353516, "distillation_loss": 4.327425956726074, "epoch": 3.66, "learning_rate": 3.521179675025829e-05, "loss": 101.0718, "step": 4332, "task_loss": 2.1862993240356445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9111476480223475, "compression/movement_sparsity/importance_threshold": -0.0006222983113484438, "compression/movement_sparsity/linear_layer_sparsity": 0.8720405766069581, "compression/movement_sparsity/model_sparsity": 0.842083300089909, "compression_loss": 97.01604461669922, "distillation_loss": 4.899751663208008, "epoch": 3.66, "learning_rate": 3.520710059171598e-05, "loss": 101.7338, "step": 4333, "task_loss": 2.4079816341400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9113157167393836, "compression/movement_sparsity/importance_threshold": -0.0006211212026228539, "compression/movement_sparsity/linear_layer_sparsity": 0.8722104363749309, "compression/movement_sparsity/model_sparsity": 0.8422473246523022, "compression_loss": 97.03340911865234, "distillation_loss": 5.685111045837402, "epoch": 3.66, "learning_rate": 3.5202404433173664e-05, "loss": 101.6567, "step": 4334, "task_loss": 2.3303985595703125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9114835733823571, "compression/movement_sparsity/importance_threshold": -0.000619945579207761, "compression/movement_sparsity/linear_layer_sparsity": 0.8722908291131319, "compression/movement_sparsity/model_sparsity": 0.842324955652629, "compression_loss": 97.05079650878906, "distillation_loss": 4.43789529800415, "epoch": 3.66, "learning_rate": 3.519770827463135e-05, "loss": 101.0301, "step": 4335, "task_loss": 2.600677728652954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9116512180851528, "compression/movement_sparsity/importance_threshold": -0.0006187714401654706, "compression/movement_sparsity/linear_layer_sparsity": 0.8722923434824217, "compression/movement_sparsity/model_sparsity": 0.8423264179986749, "compression_loss": 97.06805419921875, "distillation_loss": 4.913663864135742, "epoch": 3.66, "learning_rate": 3.519301211608904e-05, "loss": 101.0199, "step": 4336, "task_loss": 1.9883872270584106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9118186509816562, "compression/movement_sparsity/importance_threshold": -0.0006175987845582851, "compression/movement_sparsity/linear_layer_sparsity": 0.8723394677929186, "compression/movement_sparsity/model_sparsity": 0.8423719234441349, "compression_loss": 97.08537292480469, "distillation_loss": 3.683157444000244, "epoch": 3.67, "learning_rate": 3.518831595754673e-05, "loss": 100.8494, "step": 4337, "task_loss": 1.5717813968658447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9119858722057521, "compression/movement_sparsity/importance_threshold": -0.00061642761144851, "compression/movement_sparsity/linear_layer_sparsity": 0.8724589002559594, "compression/movement_sparsity/model_sparsity": 0.842487253034653, "compression_loss": 97.10261535644531, "distillation_loss": 4.721992492675781, "epoch": 3.67, "learning_rate": 3.5183619799004416e-05, "loss": 101.6085, "step": 4338, "task_loss": 3.078033685684204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9121528818913259, "compression/movement_sparsity/importance_threshold": -0.0006152579198984475, "compression/movement_sparsity/linear_layer_sparsity": 0.8725738492319691, "compression/movement_sparsity/model_sparsity": 0.8425982531597124, "compression_loss": 97.11984252929688, "distillation_loss": 5.826071262359619, "epoch": 3.67, "learning_rate": 3.51789236404621e-05, "loss": 102.0375, "step": 4339, "task_loss": 2.6733133792877197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9123196801722628, "compression/movement_sparsity/importance_threshold": -0.0006140897089704023, "compression/movement_sparsity/linear_layer_sparsity": 0.872663745531776, "compression/movement_sparsity/model_sparsity": 0.8426850612450675, "compression_loss": 97.13713836669922, "distillation_loss": 6.338521957397461, "epoch": 3.67, "learning_rate": 3.517422748191979e-05, "loss": 101.3408, "step": 4340, "task_loss": 2.889519691467285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9124862671824476, "compression/movement_sparsity/importance_threshold": -0.0006129229777266793, "compression/movement_sparsity/linear_layer_sparsity": 0.8728442058847771, "compression/movement_sparsity/model_sparsity": 0.842859322229782, "compression_loss": 97.15434265136719, "distillation_loss": 3.1071767807006836, "epoch": 3.67, "learning_rate": 3.516953132337748e-05, "loss": 101.2222, "step": 4341, "task_loss": 1.503021478652954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9126526430557658, "compression/movement_sparsity/importance_threshold": -0.0006117577252295813, "compression/movement_sparsity/linear_layer_sparsity": 0.8729242647462843, "compression/movement_sparsity/model_sparsity": 0.8429366308231065, "compression_loss": 97.17164611816406, "distillation_loss": 4.599024772644043, "epoch": 3.67, "learning_rate": 3.516483516483517e-05, "loss": 101.8892, "step": 4342, "task_loss": 1.7505542039871216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9128188079261023, "compression/movement_sparsity/importance_threshold": -0.0006105939505414125, "compression/movement_sparsity/linear_layer_sparsity": 0.8730208028074642, "compression/movement_sparsity/model_sparsity": 0.8430298525048991, "compression_loss": 97.18876647949219, "distillation_loss": 3.0789284706115723, "epoch": 3.67, "learning_rate": 3.5160139006292854e-05, "loss": 101.9085, "step": 4343, "task_loss": 2.671783447265625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9129847619273423, "compression/movement_sparsity/importance_threshold": -0.0006094316527244766, "compression/movement_sparsity/linear_layer_sparsity": 0.8731325561065475, "compression/movement_sparsity/model_sparsity": 0.8431377667343656, "compression_loss": 97.20599365234375, "distillation_loss": 3.1726179122924805, "epoch": 3.67, "learning_rate": 3.515544284775054e-05, "loss": 101.1661, "step": 4344, "task_loss": 2.655129909515381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.913150505193371, "compression/movement_sparsity/importance_threshold": -0.0006082708308410793, "compression/movement_sparsity/linear_layer_sparsity": 0.873208477281885, "compression/movement_sparsity/model_sparsity": 0.8432110797837694, "compression_loss": 97.22312927246094, "distillation_loss": 3.5077004432678223, "epoch": 3.67, "learning_rate": 3.5150746689208234e-05, "loss": 101.5269, "step": 4345, "task_loss": 1.9847369194030762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9133160378580736, "compression/movement_sparsity/importance_threshold": -0.0006071114839535218, "compression/movement_sparsity/linear_layer_sparsity": 0.8732438920597635, "compression/movement_sparsity/model_sparsity": 0.8432452779550792, "compression_loss": 97.24030303955078, "distillation_loss": 4.9577836990356445, "epoch": 3.67, "learning_rate": 3.514605053066592e-05, "loss": 101.5755, "step": 4346, "task_loss": 2.5230162143707275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9134813600553351, "compression/movement_sparsity/importance_threshold": -0.0006059536111241107, "compression/movement_sparsity/linear_layer_sparsity": 0.8734023880959795, "compression/movement_sparsity/model_sparsity": 0.8433983291648602, "compression_loss": 97.2574234008789, "distillation_loss": 4.377408027648926, "epoch": 3.67, "learning_rate": 3.51413543721236e-05, "loss": 100.9028, "step": 4347, "task_loss": 2.3648786544799805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9136464719190405, "compression/movement_sparsity/importance_threshold": -0.0006047972114151491, "compression/movement_sparsity/linear_layer_sparsity": 0.8735133663241664, "compression/movement_sparsity/model_sparsity": 0.8435054949495001, "compression_loss": 97.2745132446289, "distillation_loss": 4.381985187530518, "epoch": 3.67, "learning_rate": 3.513665821358129e-05, "loss": 101.8533, "step": 4348, "task_loss": 2.47784161567688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9138113735830754, "compression/movement_sparsity/importance_threshold": -0.000603642283888939, "compression/movement_sparsity/linear_layer_sparsity": 0.873633132663901, "compression/movement_sparsity/model_sparsity": 0.8436211469470204, "compression_loss": 97.29158020019531, "distillation_loss": 4.322001934051514, "epoch": 3.68, "learning_rate": 3.513196205503898e-05, "loss": 101.2804, "step": 4349, "task_loss": 2.2859482765197754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9139760651813245, "compression/movement_sparsity/importance_threshold": -0.0006024888276077887, "compression/movement_sparsity/linear_layer_sparsity": 0.8737948005287081, "compression/movement_sparsity/model_sparsity": 0.8437772610233228, "compression_loss": 97.30862426757812, "distillation_loss": 4.027581691741943, "epoch": 3.68, "learning_rate": 3.512726589649667e-05, "loss": 101.6502, "step": 4350, "task_loss": 3.063328981399536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9141405468476731, "compression/movement_sparsity/importance_threshold": -0.0006013368416339978, "compression/movement_sparsity/linear_layer_sparsity": 0.8739063391927739, "compression/movement_sparsity/model_sparsity": 0.843884967991145, "compression_loss": 97.32561492919922, "distillation_loss": 4.0175886154174805, "epoch": 3.68, "learning_rate": 3.512256973795435e-05, "loss": 101.8392, "step": 4351, "task_loss": 1.8267052173614502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9143048187160064, "compression/movement_sparsity/importance_threshold": -0.0006001863250298728, "compression/movement_sparsity/linear_layer_sparsity": 0.8740634997222146, "compression/movement_sparsity/model_sparsity": 0.844036729572917, "compression_loss": 97.34266662597656, "distillation_loss": 5.036438941955566, "epoch": 3.68, "learning_rate": 3.5117873579412045e-05, "loss": 101.6746, "step": 4352, "task_loss": 2.3102707862854004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9144688809202095, "compression/movement_sparsity/importance_threshold": -0.0005990372768577166, "compression/movement_sparsity/linear_layer_sparsity": 0.8741300127292875, "compression/movement_sparsity/model_sparsity": 0.844100957653579, "compression_loss": 97.35956573486328, "distillation_loss": 4.079525947570801, "epoch": 3.68, "learning_rate": 3.511317742086973e-05, "loss": 101.1357, "step": 4353, "task_loss": 2.088468074798584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9146327335941675, "compression/movement_sparsity/importance_threshold": -0.0005978896961798342, "compression/movement_sparsity/linear_layer_sparsity": 0.8742763341903471, "compression/movement_sparsity/model_sparsity": 0.8442422525223138, "compression_loss": 97.37655639648438, "distillation_loss": 6.250157356262207, "epoch": 3.68, "learning_rate": 3.510848126232742e-05, "loss": 101.7325, "step": 4354, "task_loss": 3.244389057159424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9147963768717657, "compression/movement_sparsity/importance_threshold": -0.0005967435820585275, "compression/movement_sparsity/linear_layer_sparsity": 0.8744145829899173, "compression/movement_sparsity/model_sparsity": 0.8443757520503157, "compression_loss": 97.3934555053711, "distillation_loss": 3.5031533241271973, "epoch": 3.68, "learning_rate": 3.5103785103785104e-05, "loss": 102.3573, "step": 4355, "task_loss": 1.8085384368896484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.914959810886889, "compression/movement_sparsity/importance_threshold": -0.0005955989335561023, "compression/movement_sparsity/linear_layer_sparsity": 0.8745141974863473, "compression/movement_sparsity/model_sparsity": 0.8444719444823433, "compression_loss": 97.4102783203125, "distillation_loss": 5.592926979064941, "epoch": 3.68, "learning_rate": 3.509908894524279e-05, "loss": 102.5116, "step": 4356, "task_loss": 3.1112921237945557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9151230357734226, "compression/movement_sparsity/importance_threshold": -0.0005944557497348634, "compression/movement_sparsity/linear_layer_sparsity": 0.874623351316886, "compression/movement_sparsity/model_sparsity": 0.8445773485430065, "compression_loss": 97.42716979980469, "distillation_loss": 5.326101303100586, "epoch": 3.68, "learning_rate": 3.509439278670048e-05, "loss": 101.6659, "step": 4357, "task_loss": 2.8373169898986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9152860516652519, "compression/movement_sparsity/importance_threshold": -0.000593314029657112, "compression/movement_sparsity/linear_layer_sparsity": 0.8747176476345504, "compression/movement_sparsity/model_sparsity": 0.8446684054920697, "compression_loss": 97.44400787353516, "distillation_loss": 3.7424263954162598, "epoch": 3.68, "learning_rate": 3.508969662815817e-05, "loss": 101.0525, "step": 4358, "task_loss": 2.054537057876587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9154488586962616, "compression/movement_sparsity/importance_threshold": -0.0005921737723851555, "compression/movement_sparsity/linear_layer_sparsity": 0.8748442345981727, "compression/movement_sparsity/model_sparsity": 0.8447906438040645, "compression_loss": 97.4608383178711, "distillation_loss": 5.851455211639404, "epoch": 3.68, "learning_rate": 3.5085000469615856e-05, "loss": 101.7026, "step": 4359, "task_loss": 4.171334266662598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9156114570003372, "compression/movement_sparsity/importance_threshold": -0.0005910349769812943, "compression/movement_sparsity/linear_layer_sparsity": 0.875002981041909, "compression/movement_sparsity/model_sparsity": 0.8449439368190973, "compression_loss": 97.4776382446289, "distillation_loss": 4.725736141204834, "epoch": 3.69, "learning_rate": 3.508030431107354e-05, "loss": 101.7928, "step": 4360, "task_loss": 2.2290990352630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9157738467113636, "compression/movement_sparsity/importance_threshold": -0.000589897642507835, "compression/movement_sparsity/linear_layer_sparsity": 0.8750622441550592, "compression/movement_sparsity/model_sparsity": 0.8450011640619961, "compression_loss": 97.49446105957031, "distillation_loss": 3.9074432849884033, "epoch": 3.69, "learning_rate": 3.507560815253123e-05, "loss": 101.9479, "step": 4361, "task_loss": 2.1035819053649902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9159360279632263, "compression/movement_sparsity/importance_threshold": -0.0005887617680270804, "compression/movement_sparsity/linear_layer_sparsity": 0.8752593148735771, "compression/movement_sparsity/model_sparsity": 0.8451914647950726, "compression_loss": 97.51126861572266, "distillation_loss": 4.822877883911133, "epoch": 3.69, "learning_rate": 3.507091199398892e-05, "loss": 102.6587, "step": 4362, "task_loss": 3.1632156372070312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9160980008898099, "compression/movement_sparsity/importance_threshold": -0.0005876273526013356, "compression/movement_sparsity/linear_layer_sparsity": 0.8753526691819983, "compression/movement_sparsity/model_sparsity": 0.845281612095808, "compression_loss": 97.52800750732422, "distillation_loss": 5.700374126434326, "epoch": 3.69, "learning_rate": 3.506621583544661e-05, "loss": 102.7766, "step": 4363, "task_loss": 2.5474703311920166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.916259765625, "compression/movement_sparsity/importance_threshold": -0.0005864943952929025, "compression/movement_sparsity/linear_layer_sparsity": 0.8753819072410414, "compression/movement_sparsity/model_sparsity": 0.8453098457375762, "compression_loss": 97.54472351074219, "distillation_loss": 2.6779568195343018, "epoch": 3.69, "learning_rate": 3.5061519676904294e-05, "loss": 100.8703, "step": 4364, "task_loss": 0.8352637887001038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9164213223026816, "compression/movement_sparsity/importance_threshold": -0.0005853628951640868, "compression/movement_sparsity/linear_layer_sparsity": 0.8754710046216166, "compression/movement_sparsity/model_sparsity": 0.845395882349033, "compression_loss": 97.56146240234375, "distillation_loss": 5.734720230102539, "epoch": 3.69, "learning_rate": 3.505682351836198e-05, "loss": 102.056, "step": 4365, "task_loss": 3.1355488300323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9165826710567396, "compression/movement_sparsity/importance_threshold": -0.0005842328512771925, "compression/movement_sparsity/linear_layer_sparsity": 0.8754797450364937, "compression/movement_sparsity/model_sparsity": 0.8454043225037704, "compression_loss": 97.57810974121094, "distillation_loss": 4.414103031158447, "epoch": 3.69, "learning_rate": 3.505212735981967e-05, "loss": 101.6781, "step": 4366, "task_loss": 2.1873064041137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9167438120210597, "compression/movement_sparsity/importance_threshold": -0.0005831042626945216, "compression/movement_sparsity/linear_layer_sparsity": 0.8755191067138597, "compression/movement_sparsity/model_sparsity": 0.8454423319864282, "compression_loss": 97.59477996826172, "distillation_loss": 4.569825172424316, "epoch": 3.69, "learning_rate": 3.504743120127736e-05, "loss": 101.7669, "step": 4367, "task_loss": 3.6755456924438477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9169047453295265, "compression/movement_sparsity/importance_threshold": -0.0005819771284783807, "compression/movement_sparsity/linear_layer_sparsity": 0.8755277040387252, "compression/movement_sparsity/model_sparsity": 0.845450633966736, "compression_loss": 97.61133575439453, "distillation_loss": 3.863023281097412, "epoch": 3.69, "learning_rate": 3.504273504273504e-05, "loss": 101.4991, "step": 4368, "task_loss": 2.6442606449127197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9170654711160253, "compression/movement_sparsity/importance_threshold": -0.0005808514476910729, "compression/movement_sparsity/linear_layer_sparsity": 0.8756684807618341, "compression/movement_sparsity/model_sparsity": 0.8455865745763264, "compression_loss": 97.62783813476562, "distillation_loss": 7.302313327789307, "epoch": 3.69, "learning_rate": 3.503803888419273e-05, "loss": 102.2783, "step": 4369, "task_loss": 4.270925521850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9172259895144413, "compression/movement_sparsity/importance_threshold": -0.000579727219394901, "compression/movement_sparsity/linear_layer_sparsity": 0.8757571727207096, "compression/movement_sparsity/model_sparsity": 0.8456722196935662, "compression_loss": 97.64445495605469, "distillation_loss": 5.230157852172852, "epoch": 3.69, "learning_rate": 3.503334272565042e-05, "loss": 102.6004, "step": 4370, "task_loss": 2.3331515789031982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9173863006586598, "compression/movement_sparsity/importance_threshold": -0.00057860444265217, "compression/movement_sparsity/linear_layer_sparsity": 0.8758812436849608, "compression/movement_sparsity/model_sparsity": 0.8457920284385084, "compression_loss": 97.66097259521484, "distillation_loss": 5.888511657714844, "epoch": 3.69, "learning_rate": 3.5028646567108106e-05, "loss": 101.958, "step": 4371, "task_loss": 2.2261836528778076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9175464046825655, "compression/movement_sparsity/importance_threshold": -0.0005774831165251846, "compression/movement_sparsity/linear_layer_sparsity": 0.8758866691812351, "compression/movement_sparsity/model_sparsity": 0.8457972675522949, "compression_loss": 97.67750549316406, "distillation_loss": 3.242845058441162, "epoch": 3.7, "learning_rate": 3.502395040856579e-05, "loss": 101.2045, "step": 4372, "task_loss": 1.6460928916931152 }, { "compression/movement_sparsity/importance_regularization_factor": 0.917706301720044, "compression/movement_sparsity/importance_threshold": -0.0005763632400762478, "compression/movement_sparsity/linear_layer_sparsity": 0.8759871422177349, "compression/movement_sparsity/model_sparsity": 0.8458942890308997, "compression_loss": 97.6939697265625, "distillation_loss": 5.436985969543457, "epoch": 3.7, "learning_rate": 3.501925425002348e-05, "loss": 102.1062, "step": 4373, "task_loss": 3.447542190551758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9178659919049802, "compression/movement_sparsity/importance_threshold": -0.0005752448123676627, "compression/movement_sparsity/linear_layer_sparsity": 0.8759930923773852, "compression/movement_sparsity/model_sparsity": 0.8459000347842612, "compression_loss": 97.71038055419922, "distillation_loss": 4.149409294128418, "epoch": 3.7, "learning_rate": 3.501455809148117e-05, "loss": 102.2838, "step": 4374, "task_loss": 2.66291880607605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9180254753712593, "compression/movement_sparsity/importance_threshold": -0.0005741278324617348, "compression/movement_sparsity/linear_layer_sparsity": 0.8760632541797546, "compression/movement_sparsity/model_sparsity": 0.8459677863128763, "compression_loss": 97.72676849365234, "distillation_loss": 3.26629376411438, "epoch": 3.7, "learning_rate": 3.500986193293886e-05, "loss": 101.4659, "step": 4375, "task_loss": 3.901474714279175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9181847522527664, "compression/movement_sparsity/importance_threshold": -0.000573012299420768, "compression/movement_sparsity/linear_layer_sparsity": 0.8760599273369842, "compression/movement_sparsity/model_sparsity": 0.8459645737573895, "compression_loss": 97.74317169189453, "distillation_loss": 3.4512977600097656, "epoch": 3.7, "learning_rate": 3.500516577439655e-05, "loss": 102.0178, "step": 4376, "task_loss": 1.805649995803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9183438226833867, "compression/movement_sparsity/importance_threshold": -0.0005718982123070655, "compression/movement_sparsity/linear_layer_sparsity": 0.8761914032093373, "compression/movement_sparsity/model_sparsity": 0.8460915330290601, "compression_loss": 97.75955963134766, "distillation_loss": 4.556659698486328, "epoch": 3.7, "learning_rate": 3.500046961585423e-05, "loss": 101.8656, "step": 4377, "task_loss": 3.28719162940979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9185026867970052, "compression/movement_sparsity/importance_threshold": -0.000570785570182932, "compression/movement_sparsity/linear_layer_sparsity": 0.8762492831190417, "compression/movement_sparsity/model_sparsity": 0.8461474245858069, "compression_loss": 97.77592468261719, "distillation_loss": 4.052186012268066, "epoch": 3.7, "learning_rate": 3.499577345731192e-05, "loss": 102.3659, "step": 4378, "task_loss": 2.1523499488830566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9186613447275072, "compression/movement_sparsity/importance_threshold": -0.0005696743721106713, "compression/movement_sparsity/linear_layer_sparsity": 0.8763227717641816, "compression/movement_sparsity/model_sparsity": 0.8462183886699086, "compression_loss": 97.7922592163086, "distillation_loss": 4.916721820831299, "epoch": 3.7, "learning_rate": 3.499107729876961e-05, "loss": 102.4063, "step": 4379, "task_loss": 1.7476212978363037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9188197966087779, "compression/movement_sparsity/importance_threshold": -0.0005685646171525857, "compression/movement_sparsity/linear_layer_sparsity": 0.876369943771349, "compression/movement_sparsity/model_sparsity": 0.8462639401735118, "compression_loss": 97.80854797363281, "distillation_loss": 4.322973251342773, "epoch": 3.7, "learning_rate": 3.4986381140227296e-05, "loss": 102.0809, "step": 4380, "task_loss": 1.897320032119751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9189780425747021, "compression/movement_sparsity/importance_threshold": -0.0005674563043709824, "compression/movement_sparsity/linear_layer_sparsity": 0.8764157206509031, "compression/movement_sparsity/model_sparsity": 0.8463081444764271, "compression_loss": 97.82487487792969, "distillation_loss": 4.618863582611084, "epoch": 3.7, "learning_rate": 3.498168498168498e-05, "loss": 102.7742, "step": 4381, "task_loss": 2.2519044876098633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9191360827591653, "compression/movement_sparsity/importance_threshold": -0.0005663494328281629, "compression/movement_sparsity/linear_layer_sparsity": 0.8765546491280285, "compression/movement_sparsity/model_sparsity": 0.8464423003329693, "compression_loss": 97.84112548828125, "distillation_loss": 5.822296619415283, "epoch": 3.7, "learning_rate": 3.497698882314267e-05, "loss": 103.0421, "step": 4382, "task_loss": 2.226498603820801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9192939172960525, "compression/movement_sparsity/importance_threshold": -0.0005652440015864318, "compression/movement_sparsity/linear_layer_sparsity": 0.8766760490787293, "compression/movement_sparsity/model_sparsity": 0.8465595298218935, "compression_loss": 97.85743713378906, "distillation_loss": 3.742464303970337, "epoch": 3.7, "learning_rate": 3.497229266460036e-05, "loss": 101.7592, "step": 4383, "task_loss": 2.0629873275756836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9194515463192489, "compression/movement_sparsity/importance_threshold": -0.0005641400097080931, "compression/movement_sparsity/linear_layer_sparsity": 0.8767725275190711, "compression/movement_sparsity/model_sparsity": 0.8466526939310072, "compression_loss": 97.87367248535156, "distillation_loss": 5.440330505371094, "epoch": 3.71, "learning_rate": 3.496759650605805e-05, "loss": 102.4077, "step": 4384, "task_loss": 2.4062983989715576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9196089699626395, "compression/movement_sparsity/importance_threshold": -0.0005630374562554516, "compression/movement_sparsity/linear_layer_sparsity": 0.8768474947609977, "compression/movement_sparsity/model_sparsity": 0.8467250858175475, "compression_loss": 97.8899154663086, "distillation_loss": 4.898593902587891, "epoch": 3.71, "learning_rate": 3.4962900347515735e-05, "loss": 102.895, "step": 4385, "task_loss": 2.6820461750030518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9197661883601096, "compression/movement_sparsity/importance_threshold": -0.0005619363402908101, "compression/movement_sparsity/linear_layer_sparsity": 0.8770033674803337, "compression/movement_sparsity/model_sparsity": 0.8468756038294537, "compression_loss": 97.90615844726562, "distillation_loss": 3.1594228744506836, "epoch": 3.71, "learning_rate": 3.495820418897342e-05, "loss": 102.0073, "step": 4386, "task_loss": 1.6229994297027588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9199232016455442, "compression/movement_sparsity/importance_threshold": -0.0005608366608764728, "compression/movement_sparsity/linear_layer_sparsity": 0.8771186503330373, "compression/movement_sparsity/model_sparsity": 0.8469869263615153, "compression_loss": 97.92240142822266, "distillation_loss": 4.717376232147217, "epoch": 3.71, "learning_rate": 3.495350803043111e-05, "loss": 102.3721, "step": 4387, "task_loss": 2.2538952827453613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9200800099528286, "compression/movement_sparsity/importance_threshold": -0.0005597384170747443, "compression/movement_sparsity/linear_layer_sparsity": 0.877221973245602, "compression/movement_sparsity/model_sparsity": 0.847086699814175, "compression_loss": 97.93862915039062, "distillation_loss": 5.628116607666016, "epoch": 3.71, "learning_rate": 3.49488118718888e-05, "loss": 102.9751, "step": 4388, "task_loss": 2.7923123836517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9202366134158478, "compression/movement_sparsity/importance_threshold": -0.0005586416079479276, "compression/movement_sparsity/linear_layer_sparsity": 0.877284813609043, "compression/movement_sparsity/model_sparsity": 0.8471473814178122, "compression_loss": 97.95476531982422, "distillation_loss": 4.245274066925049, "epoch": 3.71, "learning_rate": 3.494411571334649e-05, "loss": 101.7774, "step": 4389, "task_loss": 3.085179567337036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9203930121684869, "compression/movement_sparsity/importance_threshold": -0.0005575462325583293, "compression/movement_sparsity/linear_layer_sparsity": 0.8773571217615869, "compression/movement_sparsity/model_sparsity": 0.8472172055628703, "compression_loss": 97.97088623046875, "distillation_loss": 4.324873924255371, "epoch": 3.71, "learning_rate": 3.493941955480417e-05, "loss": 102.3618, "step": 4390, "task_loss": 2.523642063140869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9205492063446312, "compression/movement_sparsity/importance_threshold": -0.0005564522899682497, "compression/movement_sparsity/linear_layer_sparsity": 0.877529986419804, "compression/movement_sparsity/model_sparsity": 0.8473841317882838, "compression_loss": 97.98705291748047, "distillation_loss": 4.431741714477539, "epoch": 3.71, "learning_rate": 3.493472339626186e-05, "loss": 102.5287, "step": 4391, "task_loss": 2.067526340484619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9207051960781659, "compression/movement_sparsity/importance_threshold": -0.0005553597792399938, "compression/movement_sparsity/linear_layer_sparsity": 0.8776264052393076, "compression/movement_sparsity/model_sparsity": 0.8474772383247184, "compression_loss": 98.00316619873047, "distillation_loss": 5.192829132080078, "epoch": 3.71, "learning_rate": 3.4930027237719546e-05, "loss": 102.5017, "step": 4392, "task_loss": 2.803555488586426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9208609815029759, "compression/movement_sparsity/importance_threshold": -0.0005542686994358679, "compression/movement_sparsity/linear_layer_sparsity": 0.8777513466677961, "compression/movement_sparsity/model_sparsity": 0.8475978876307736, "compression_loss": 98.01921081542969, "distillation_loss": 5.146759033203125, "epoch": 3.71, "learning_rate": 3.492533107917724e-05, "loss": 102.5415, "step": 4393, "task_loss": 2.4057204723358154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9210165627529465, "compression/movement_sparsity/importance_threshold": -0.0005531790496181724, "compression/movement_sparsity/linear_layer_sparsity": 0.877869240913212, "compression/movement_sparsity/model_sparsity": 0.8477117318461742, "compression_loss": 98.03527069091797, "distillation_loss": 3.9340944290161133, "epoch": 3.71, "learning_rate": 3.492063492063492e-05, "loss": 102.211, "step": 4394, "task_loss": 3.596043109893799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9211719399619628, "compression/movement_sparsity/importance_threshold": -0.0005520908288492148, "compression/movement_sparsity/linear_layer_sparsity": 0.8779362785836607, "compression/movement_sparsity/model_sparsity": 0.8477764665664111, "compression_loss": 98.05123138427734, "distillation_loss": 4.112605094909668, "epoch": 3.71, "learning_rate": 3.491593876209261e-05, "loss": 101.8275, "step": 4395, "task_loss": 2.5289621353149414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9213271132639099, "compression/movement_sparsity/importance_threshold": -0.0005510040361912964, "compression/movement_sparsity/linear_layer_sparsity": 0.878077079155105, "compression/movement_sparsity/model_sparsity": 0.8479124302050731, "compression_loss": 98.06721496582031, "distillation_loss": 3.3166773319244385, "epoch": 3.72, "learning_rate": 3.49112426035503e-05, "loss": 102.3046, "step": 4396, "task_loss": 1.3342349529266357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.921482082792673, "compression/movement_sparsity/importance_threshold": -0.0005499186707067236, "compression/movement_sparsity/linear_layer_sparsity": 0.8780705327870729, "compression/movement_sparsity/model_sparsity": 0.8479061087249219, "compression_loss": 98.08320617675781, "distillation_loss": 3.243734836578369, "epoch": 3.72, "learning_rate": 3.4906546445007984e-05, "loss": 102.0052, "step": 4397, "task_loss": 2.555058717727661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9216368486821372, "compression/movement_sparsity/importance_threshold": -0.0005488347314577969, "compression/movement_sparsity/linear_layer_sparsity": 0.8782110352543263, "compression/movement_sparsity/model_sparsity": 0.848041784500189, "compression_loss": 98.09909057617188, "distillation_loss": 3.1129395961761475, "epoch": 3.72, "learning_rate": 3.490185028646567e-05, "loss": 101.7379, "step": 4398, "task_loss": 2.2047152519226074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9217914110661877, "compression/movement_sparsity/importance_threshold": -0.0005477522175068245, "compression/movement_sparsity/linear_layer_sparsity": 0.8782021517494376, "compression/movement_sparsity/model_sparsity": 0.8480332061710221, "compression_loss": 98.11495208740234, "distillation_loss": 4.724053382873535, "epoch": 3.72, "learning_rate": 3.489715412792336e-05, "loss": 102.721, "step": 4399, "task_loss": 3.069983720779419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9219457700787095, "compression/movement_sparsity/importance_threshold": -0.0005466711279161077, "compression/movement_sparsity/linear_layer_sparsity": 0.8783199386773446, "compression/movement_sparsity/model_sparsity": 0.8481469467556005, "compression_loss": 98.13084411621094, "distillation_loss": 4.570070266723633, "epoch": 3.72, "learning_rate": 3.489245796938105e-05, "loss": 102.6567, "step": 4400, "task_loss": 1.9823681116104126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9220999258535879, "compression/movement_sparsity/importance_threshold": -0.0005455914617479504, "compression/movement_sparsity/linear_layer_sparsity": 0.8784398838795937, "compression/movement_sparsity/model_sparsity": 0.8482627714711578, "compression_loss": 98.1467056274414, "distillation_loss": 4.059892177581787, "epoch": 3.72, "learning_rate": 3.4887761810838736e-05, "loss": 101.9054, "step": 4401, "task_loss": 4.341288089752197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9222538785247081, "compression/movement_sparsity/importance_threshold": -0.0005445132180646574, "compression/movement_sparsity/linear_layer_sparsity": 0.8785101291511366, "compression/movement_sparsity/model_sparsity": 0.8483306036015235, "compression_loss": 98.16249084472656, "distillation_loss": 6.237392425537109, "epoch": 3.72, "learning_rate": 3.488306565229642e-05, "loss": 102.5395, "step": 4402, "task_loss": 2.472874164581299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922407628225955, "compression/movement_sparsity/importance_threshold": -0.0005434363959285326, "compression/movement_sparsity/linear_layer_sparsity": 0.8785222798779576, "compression/movement_sparsity/model_sparsity": 0.848342336913498, "compression_loss": 98.17829895019531, "distillation_loss": 3.581615447998047, "epoch": 3.72, "learning_rate": 3.487836949375411e-05, "loss": 102.1503, "step": 4403, "task_loss": 1.8551411628723145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9225611750912138, "compression/movement_sparsity/importance_threshold": -0.0005423609944018798, "compression/movement_sparsity/linear_layer_sparsity": 0.8786094455433757, "compression/movement_sparsity/model_sparsity": 0.8484265081701562, "compression_loss": 98.19400024414062, "distillation_loss": 5.7543110847473145, "epoch": 3.72, "learning_rate": 3.4873673335211795e-05, "loss": 102.9049, "step": 4404, "task_loss": 3.0706069469451904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9227145192543698, "compression/movement_sparsity/importance_threshold": -0.0005412870125470039, "compression/movement_sparsity/linear_layer_sparsity": 0.8786684582490056, "compression/movement_sparsity/model_sparsity": 0.8484834936078033, "compression_loss": 98.20977783203125, "distillation_loss": 3.6487467288970947, "epoch": 3.72, "learning_rate": 3.486897717666949e-05, "loss": 101.872, "step": 4405, "task_loss": 0.9840919971466064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.922867660849308, "compression/movement_sparsity/importance_threshold": -0.000540214449426207, "compression/movement_sparsity/linear_layer_sparsity": 0.8787004984874431, "compression/movement_sparsity/model_sparsity": 0.8485144331654833, "compression_loss": 98.22547149658203, "distillation_loss": 4.707341194152832, "epoch": 3.72, "learning_rate": 3.4864281018127175e-05, "loss": 102.4773, "step": 4406, "task_loss": 2.048301935195923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9230206000099137, "compression/movement_sparsity/importance_threshold": -0.0005391433041017938, "compression/movement_sparsity/linear_layer_sparsity": 0.8787702310197777, "compression/movement_sparsity/model_sparsity": 0.8485817701708097, "compression_loss": 98.24114227294922, "distillation_loss": 3.378244638442993, "epoch": 3.72, "learning_rate": 3.485958485958486e-05, "loss": 102.2148, "step": 4407, "task_loss": 1.2717474699020386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9231733368700717, "compression/movement_sparsity/importance_threshold": -0.0005380735756360701, "compression/movement_sparsity/linear_layer_sparsity": 0.8788692254594906, "compression/movement_sparsity/model_sparsity": 0.848677363846976, "compression_loss": 98.25680541992188, "distillation_loss": 4.324892997741699, "epoch": 3.73, "learning_rate": 3.485488870104255e-05, "loss": 102.986, "step": 4408, "task_loss": 2.877359628677368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9233258715636676, "compression/movement_sparsity/importance_threshold": -0.000537005263091337, "compression/movement_sparsity/linear_layer_sparsity": 0.878921345996227, "compression/movement_sparsity/model_sparsity": 0.8487276938829339, "compression_loss": 98.27249908447266, "distillation_loss": 7.238514423370361, "epoch": 3.73, "learning_rate": 3.485019254250024e-05, "loss": 103.388, "step": 4409, "task_loss": 3.8885645866394043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9234782042245862, "compression/movement_sparsity/importance_threshold": -0.0005359383655299012, "compression/movement_sparsity/linear_layer_sparsity": 0.8790842182019653, "compression/movement_sparsity/model_sparsity": 0.8488849709273516, "compression_loss": 98.28812408447266, "distillation_loss": 6.472870349884033, "epoch": 3.73, "learning_rate": 3.484549638395793e-05, "loss": 103.4889, "step": 4410, "task_loss": 3.249234914779663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9236303349867128, "compression/movement_sparsity/importance_threshold": -0.0005348728820140638, "compression/movement_sparsity/linear_layer_sparsity": 0.879155548572763, "compression/movement_sparsity/model_sparsity": 0.8489538508804745, "compression_loss": 98.30377197265625, "distillation_loss": 5.607931613922119, "epoch": 3.73, "learning_rate": 3.4840800225415607e-05, "loss": 103.7559, "step": 4411, "task_loss": 3.070171594619751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9237822639839324, "compression/movement_sparsity/importance_threshold": -0.0005338088116061315, "compression/movement_sparsity/linear_layer_sparsity": 0.8793229758105381, "compression/movement_sparsity/model_sparsity": 0.8491155264775657, "compression_loss": 98.31938171386719, "distillation_loss": 4.288600921630859, "epoch": 3.73, "learning_rate": 3.48361040668733e-05, "loss": 103.0181, "step": 4412, "task_loss": 2.7368335723876953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9239339913501302, "compression/movement_sparsity/importance_threshold": -0.0005327461533684063, "compression/movement_sparsity/linear_layer_sparsity": 0.8795080388922467, "compression/movement_sparsity/model_sparsity": 0.849294232073097, "compression_loss": 98.3349380493164, "distillation_loss": 3.6252031326293945, "epoch": 3.73, "learning_rate": 3.4831407908330986e-05, "loss": 102.781, "step": 4413, "task_loss": 2.3738551139831543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9240855172191914, "compression/movement_sparsity/importance_threshold": -0.0005316849063631938, "compression/movement_sparsity/linear_layer_sparsity": 0.8795096009582071, "compression/movement_sparsity/model_sparsity": 0.8492957404772861, "compression_loss": 98.35049438476562, "distillation_loss": 4.822930812835693, "epoch": 3.73, "learning_rate": 3.482671174978868e-05, "loss": 102.8346, "step": 4414, "task_loss": 2.5510809421539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9242368417250012, "compression/movement_sparsity/importance_threshold": -0.0005306250696527964, "compression/movement_sparsity/linear_layer_sparsity": 0.8795390298039324, "compression/movement_sparsity/model_sparsity": 0.849324158351627, "compression_loss": 98.36602783203125, "distillation_loss": 4.712005615234375, "epoch": 3.73, "learning_rate": 3.482201559124636e-05, "loss": 103.0312, "step": 4415, "task_loss": 2.530864715576172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9243879650014446, "compression/movement_sparsity/importance_threshold": -0.0005295666422995195, "compression/movement_sparsity/linear_layer_sparsity": 0.8796494237479051, "compression/movement_sparsity/model_sparsity": 0.8494307599240128, "compression_loss": 98.38152313232422, "distillation_loss": 3.481534004211426, "epoch": 3.73, "learning_rate": 3.481731943270405e-05, "loss": 102.2064, "step": 4416, "task_loss": 1.501293420791626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9245388871824067, "compression/movement_sparsity/importance_threshold": -0.0005285096233656663, "compression/movement_sparsity/linear_layer_sparsity": 0.8796395028404321, "compression/movement_sparsity/model_sparsity": 0.8494211798302318, "compression_loss": 98.39698791503906, "distillation_loss": 3.4733471870422363, "epoch": 3.73, "learning_rate": 3.481262327416174e-05, "loss": 102.7247, "step": 4417, "task_loss": 2.9259755611419678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9246896084017728, "compression/movement_sparsity/importance_threshold": -0.0005274540119135406, "compression/movement_sparsity/linear_layer_sparsity": 0.8797395585310647, "compression/movement_sparsity/model_sparsity": 0.8495177983000838, "compression_loss": 98.41239166259766, "distillation_loss": 4.621831893920898, "epoch": 3.73, "learning_rate": 3.4807927115619424e-05, "loss": 103.0856, "step": 4418, "task_loss": 1.6848464012145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.924840128793428, "compression/movement_sparsity/importance_threshold": -0.0005263998070054463, "compression/movement_sparsity/linear_layer_sparsity": 0.8797769407966031, "compression/movement_sparsity/model_sparsity": 0.8495538963697996, "compression_loss": 98.42784118652344, "distillation_loss": 4.113286972045898, "epoch": 3.73, "learning_rate": 3.480323095707712e-05, "loss": 102.6295, "step": 4419, "task_loss": 1.558012843132019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9249904484912574, "compression/movement_sparsity/importance_threshold": -0.0005253470077036882, "compression/movement_sparsity/linear_layer_sparsity": 0.8798764360513567, "compression/movement_sparsity/model_sparsity": 0.8496499736564693, "compression_loss": 98.44325256347656, "distillation_loss": 5.690581321716309, "epoch": 3.74, "learning_rate": 3.47985347985348e-05, "loss": 103.4306, "step": 4420, "task_loss": 3.757448196411133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9251405676291462, "compression/movement_sparsity/importance_threshold": -0.0005242956130705701, "compression/movement_sparsity/linear_layer_sparsity": 0.8800254285259669, "compression/movement_sparsity/model_sparsity": 0.849793847781222, "compression_loss": 98.4586410522461, "distillation_loss": 3.2571897506713867, "epoch": 3.74, "learning_rate": 3.479383863999249e-05, "loss": 102.8102, "step": 4421, "task_loss": 1.1265833377838135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9252904863409794, "compression/movement_sparsity/importance_threshold": -0.0005232456221683952, "compression/movement_sparsity/linear_layer_sparsity": 0.8802376429373823, "compression/movement_sparsity/model_sparsity": 0.8499987719747575, "compression_loss": 98.47400665283203, "distillation_loss": 4.661984443664551, "epoch": 3.74, "learning_rate": 3.4789142481450177e-05, "loss": 103.2353, "step": 4422, "task_loss": 3.007985830307007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9254402047606423, "compression/movement_sparsity/importance_threshold": -0.0005221970340594682, "compression/movement_sparsity/linear_layer_sparsity": 0.8802697547208257, "compression/movement_sparsity/model_sparsity": 0.8500297806196522, "compression_loss": 98.48930358886719, "distillation_loss": 3.7220749855041504, "epoch": 3.74, "learning_rate": 3.478444632290786e-05, "loss": 103.1177, "step": 4423, "task_loss": 2.948399305343628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9255897230220199, "compression/movement_sparsity/importance_threshold": -0.000521149847806093, "compression/movement_sparsity/linear_layer_sparsity": 0.8803362558037309, "compression/movement_sparsity/model_sparsity": 0.8500939971857784, "compression_loss": 98.50460815429688, "distillation_loss": 4.484265327453613, "epoch": 3.74, "learning_rate": 3.477975016436555e-05, "loss": 102.7175, "step": 4424, "task_loss": 3.2189276218414307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9257390412589975, "compression/movement_sparsity/importance_threshold": -0.0005201040624705734, "compression/movement_sparsity/linear_layer_sparsity": 0.8804072642220024, "compression/movement_sparsity/model_sparsity": 0.8501625662464348, "compression_loss": 98.51992797851562, "distillation_loss": 5.456811904907227, "epoch": 3.74, "learning_rate": 3.4775054005823236e-05, "loss": 103.5835, "step": 4425, "task_loss": 3.655853033065796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9258881596054601, "compression/movement_sparsity/importance_threshold": -0.0005190596771152134, "compression/movement_sparsity/linear_layer_sparsity": 0.880610142010159, "compression/movement_sparsity/model_sparsity": 0.8503584745584432, "compression_loss": 98.53523254394531, "distillation_loss": 4.379825115203857, "epoch": 3.74, "learning_rate": 3.477035784728093e-05, "loss": 102.8895, "step": 4426, "task_loss": 2.307105541229248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9260370781952929, "compression/movement_sparsity/importance_threshold": -0.0005180166908023169, "compression/movement_sparsity/linear_layer_sparsity": 0.8807165413579738, "compression/movement_sparsity/model_sparsity": 0.8504612187613378, "compression_loss": 98.55043029785156, "distillation_loss": 4.824334144592285, "epoch": 3.74, "learning_rate": 3.4765661688738615e-05, "loss": 103.2126, "step": 4427, "task_loss": 3.119638204574585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.926185797162381, "compression/movement_sparsity/importance_threshold": -0.0005169751025941877, "compression/movement_sparsity/linear_layer_sparsity": 0.8807723583866773, "compression/movement_sparsity/model_sparsity": 0.8505151183033921, "compression_loss": 98.56568908691406, "distillation_loss": 4.777392387390137, "epoch": 3.74, "learning_rate": 3.47609655301963e-05, "loss": 103.1184, "step": 4428, "task_loss": 2.080717086791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9263343166406097, "compression/movement_sparsity/importance_threshold": -0.0005159349115531307, "compression/movement_sparsity/linear_layer_sparsity": 0.8807464829429075, "compression/movement_sparsity/model_sparsity": 0.850490131760718, "compression_loss": 98.58094024658203, "distillation_loss": 4.61302375793457, "epoch": 3.74, "learning_rate": 3.475626937165399e-05, "loss": 102.4243, "step": 4429, "task_loss": 2.870847463607788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9264826367638639, "compression/movement_sparsity/importance_threshold": -0.0005148961167414489, "compression/movement_sparsity/linear_layer_sparsity": 0.880837631280316, "compression/movement_sparsity/model_sparsity": 0.8505781488723314, "compression_loss": 98.59613800048828, "distillation_loss": 4.479694366455078, "epoch": 3.74, "learning_rate": 3.4751573213111674e-05, "loss": 103.1985, "step": 4430, "task_loss": 2.971008777618408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9266307576660289, "compression/movement_sparsity/importance_threshold": -0.000513858717221447, "compression/movement_sparsity/linear_layer_sparsity": 0.8809093193761428, "compression/movement_sparsity/model_sparsity": 0.8506473742615283, "compression_loss": 98.61127471923828, "distillation_loss": 5.196210861206055, "epoch": 3.75, "learning_rate": 3.474687705456937e-05, "loss": 103.2381, "step": 4431, "task_loss": 3.1009883880615234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9267786794809898, "compression/movement_sparsity/importance_threshold": -0.0005128227120554281, "compression/movement_sparsity/linear_layer_sparsity": 0.8809243557515316, "compression/movement_sparsity/model_sparsity": 0.8506618940911651, "compression_loss": 98.62651824951172, "distillation_loss": 4.621634483337402, "epoch": 3.75, "learning_rate": 3.474218089602705e-05, "loss": 102.6086, "step": 4432, "task_loss": 3.353809356689453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9269264023426318, "compression/movement_sparsity/importance_threshold": -0.000511788100305696, "compression/movement_sparsity/linear_layer_sparsity": 0.8809761781840771, "compression/movement_sparsity/model_sparsity": 0.8507119362637282, "compression_loss": 98.64166259765625, "distillation_loss": 5.1853251457214355, "epoch": 3.75, "learning_rate": 3.473748473748474e-05, "loss": 104.3603, "step": 4433, "task_loss": 3.257661819458008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9270739263848399, "compression/movement_sparsity/importance_threshold": -0.0005107548810345565, "compression/movement_sparsity/linear_layer_sparsity": 0.8810888257957331, "compression/movement_sparsity/model_sparsity": 0.8508207140833792, "compression_loss": 98.65682220458984, "distillation_loss": 3.831122636795044, "epoch": 3.75, "learning_rate": 3.4732788578942426e-05, "loss": 102.2264, "step": 4434, "task_loss": 2.1604018211364746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9272212517414994, "compression/movement_sparsity/importance_threshold": -0.0005097230533043116, "compression/movement_sparsity/linear_layer_sparsity": 0.8811399208540528, "compression/movement_sparsity/model_sparsity": 0.8508700538692588, "compression_loss": 98.67196655273438, "distillation_loss": 3.9108176231384277, "epoch": 3.75, "learning_rate": 3.472809242040011e-05, "loss": 102.5798, "step": 4435, "task_loss": 1.7637782096862793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9273683785464952, "compression/movement_sparsity/importance_threshold": -0.000508692616177267, "compression/movement_sparsity/linear_layer_sparsity": 0.8811419837350538, "compression/movement_sparsity/model_sparsity": 0.8508720458839513, "compression_loss": 98.68704986572266, "distillation_loss": 4.225275993347168, "epoch": 3.75, "learning_rate": 3.4723396261857806e-05, "loss": 103.0752, "step": 4436, "task_loss": 2.394631862640381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9275153069337128, "compression/movement_sparsity/importance_threshold": -0.0005076635687157258, "compression/movement_sparsity/linear_layer_sparsity": 0.881258387459515, "compression/movement_sparsity/model_sparsity": 0.8509844507823776, "compression_loss": 98.70206451416016, "distillation_loss": 2.862515449523926, "epoch": 3.75, "learning_rate": 3.4718700103315485e-05, "loss": 102.6969, "step": 4437, "task_loss": 1.615430474281311 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9276620370370371, "compression/movement_sparsity/importance_threshold": -0.0005066359099819909, "compression/movement_sparsity/linear_layer_sparsity": 0.8814404575751471, "compression/movement_sparsity/model_sparsity": 0.8511602662294244, "compression_loss": 98.71717834472656, "distillation_loss": 5.4086174964904785, "epoch": 3.75, "learning_rate": 3.471400394477318e-05, "loss": 103.4409, "step": 4438, "task_loss": 2.782451868057251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9278085689903532, "compression/movement_sparsity/importance_threshold": -0.000505609639038368, "compression/movement_sparsity/linear_layer_sparsity": 0.8814752642204762, "compression/movement_sparsity/model_sparsity": 0.8511938771594086, "compression_loss": 98.73219299316406, "distillation_loss": 3.788233757019043, "epoch": 3.75, "learning_rate": 3.4709307786230865e-05, "loss": 102.6537, "step": 4439, "task_loss": 1.8180707693099976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9279549029275463, "compression/movement_sparsity/importance_threshold": -0.000504584754947161, "compression/movement_sparsity/linear_layer_sparsity": 0.8815898673956244, "compression/movement_sparsity/model_sparsity": 0.85130454336293, "compression_loss": 98.74718475341797, "distillation_loss": 4.155143737792969, "epoch": 3.75, "learning_rate": 3.470461162768856e-05, "loss": 103.1576, "step": 4440, "task_loss": 1.5821231603622437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9281010389825016, "compression/movement_sparsity/importance_threshold": -0.0005035612567706729, "compression/movement_sparsity/linear_layer_sparsity": 0.8816934764882124, "compression/movement_sparsity/model_sparsity": 0.8514045931644488, "compression_loss": 98.76216125488281, "distillation_loss": 5.316849231719971, "epoch": 3.75, "learning_rate": 3.469991546914624e-05, "loss": 103.7195, "step": 4441, "task_loss": 2.2870032787323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9282469772891043, "compression/movement_sparsity/importance_threshold": -0.0005025391435712068, "compression/movement_sparsity/linear_layer_sparsity": 0.8818606890909702, "compression/movement_sparsity/model_sparsity": 0.8515660614998957, "compression_loss": 98.77710723876953, "distillation_loss": 3.302790403366089, "epoch": 3.75, "learning_rate": 3.4695219310603924e-05, "loss": 103.012, "step": 4442, "task_loss": 1.8679653406143188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9283927179812393, "compression/movement_sparsity/importance_threshold": -0.00050151841441107, "compression/movement_sparsity/linear_layer_sparsity": 0.8820303103755903, "compression/movement_sparsity/model_sparsity": 0.851729855771573, "compression_loss": 98.7920913696289, "distillation_loss": 4.184677600860596, "epoch": 3.76, "learning_rate": 3.469052315206162e-05, "loss": 103.2717, "step": 4443, "task_loss": 1.7728079557418823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.928538261192792, "compression/movement_sparsity/importance_threshold": -0.0005004990683525629, "compression/movement_sparsity/linear_layer_sparsity": 0.8820840645232928, "compression/movement_sparsity/model_sparsity": 0.8517817632989347, "compression_loss": 98.80699157714844, "distillation_loss": 3.965651035308838, "epoch": 3.76, "learning_rate": 3.46858269935193e-05, "loss": 102.8307, "step": 4444, "task_loss": 2.5893805027008057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9286836070576474, "compression/movement_sparsity/importance_threshold": -0.0004994811044579921, "compression/movement_sparsity/linear_layer_sparsity": 0.8821462252091785, "compression/movement_sparsity/model_sparsity": 0.8518417885740317, "compression_loss": 98.82188415527344, "distillation_loss": 2.74373197555542, "epoch": 3.76, "learning_rate": 3.468113083497699e-05, "loss": 102.1805, "step": 4445, "task_loss": 1.954851746559143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9288287557096907, "compression/movement_sparsity/importance_threshold": -0.0004984645217896598, "compression/movement_sparsity/linear_layer_sparsity": 0.8821824508304563, "compression/movement_sparsity/model_sparsity": 0.8518767697337756, "compression_loss": 98.83677673339844, "distillation_loss": 5.503054141998291, "epoch": 3.76, "learning_rate": 3.4676434676434676e-05, "loss": 102.7916, "step": 4446, "task_loss": 2.6752309799194336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.928973707282807, "compression/movement_sparsity/importance_threshold": -0.0004974493194098706, "compression/movement_sparsity/linear_layer_sparsity": 0.8822104845485682, "compression/movement_sparsity/model_sparsity": 0.8519038404074285, "compression_loss": 98.85163116455078, "distillation_loss": 5.475777626037598, "epoch": 3.76, "learning_rate": 3.467173851789237e-05, "loss": 102.9896, "step": 4447, "task_loss": 3.9551022052764893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9291184619108814, "compression/movement_sparsity/importance_threshold": -0.0004964354963809285, "compression/movement_sparsity/linear_layer_sparsity": 0.8822520283486115, "compression/movement_sparsity/model_sparsity": 0.8519439570501367, "compression_loss": 98.86638641357422, "distillation_loss": 4.3310770988464355, "epoch": 3.76, "learning_rate": 3.4667042359350055e-05, "loss": 103.2151, "step": 4448, "task_loss": 2.13908052444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9292630197277991, "compression/movement_sparsity/importance_threshold": -0.0004954230517651382, "compression/movement_sparsity/linear_layer_sparsity": 0.8822478071932685, "compression/movement_sparsity/model_sparsity": 0.8519398809044654, "compression_loss": 98.88125610351562, "distillation_loss": 3.1133248805999756, "epoch": 3.76, "learning_rate": 3.4662346200807735e-05, "loss": 102.2601, "step": 4449, "task_loss": 0.6650223135948181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9294073808674452, "compression/movement_sparsity/importance_threshold": -0.0004944119846248029, "compression/movement_sparsity/linear_layer_sparsity": 0.8823908733565635, "compression/movement_sparsity/model_sparsity": 0.8520780323049283, "compression_loss": 98.89591217041016, "distillation_loss": 4.8558244705200195, "epoch": 3.76, "learning_rate": 3.465765004226543e-05, "loss": 102.9615, "step": 4450, "task_loss": 2.8740787506103516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.929551545463705, "compression/movement_sparsity/importance_threshold": -0.0004934022940222263, "compression/movement_sparsity/linear_layer_sparsity": 0.8823779952555167, "compression/movement_sparsity/model_sparsity": 0.8520655966062702, "compression_loss": 98.91071319580078, "distillation_loss": 6.644176959991455, "epoch": 3.76, "learning_rate": 3.4652953883723114e-05, "loss": 104.3045, "step": 4451, "task_loss": 3.268507957458496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9296955136504633, "compression/movement_sparsity/importance_threshold": -0.0004923939790197132, "compression/movement_sparsity/linear_layer_sparsity": 0.8823653079411522, "compression/movement_sparsity/model_sparsity": 0.8520533451401848, "compression_loss": 98.92545318603516, "distillation_loss": 4.655245304107666, "epoch": 3.76, "learning_rate": 3.464825772518081e-05, "loss": 103.3562, "step": 4452, "task_loss": 2.356950283050537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9298392855616057, "compression/movement_sparsity/importance_threshold": -0.0004913870386795667, "compression/movement_sparsity/linear_layer_sparsity": 0.8824989778603503, "compression/movement_sparsity/model_sparsity": 0.8521824230864417, "compression_loss": 98.94017028808594, "distillation_loss": 4.210940361022949, "epoch": 3.76, "learning_rate": 3.4643561566638494e-05, "loss": 102.7465, "step": 4453, "task_loss": 2.505951404571533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9299828613310169, "compression/movement_sparsity/importance_threshold": -0.0004903814720640916, "compression/movement_sparsity/linear_layer_sparsity": 0.8825174006993477, "compression/movement_sparsity/model_sparsity": 0.8522002130442442, "compression_loss": 98.95484924316406, "distillation_loss": 4.45470666885376, "epoch": 3.76, "learning_rate": 3.463886540809618e-05, "loss": 103.4222, "step": 4454, "task_loss": 2.502286434173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9301262410925824, "compression/movement_sparsity/importance_threshold": -0.0004893772782355909, "compression/movement_sparsity/linear_layer_sparsity": 0.8826204732043921, "compression/movement_sparsity/model_sparsity": 0.8522997446916522, "compression_loss": 98.96949005126953, "distillation_loss": 5.719060897827148, "epoch": 3.77, "learning_rate": 3.4634169249553866e-05, "loss": 103.7986, "step": 4455, "task_loss": 2.194148540496826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.930269424980187, "compression/movement_sparsity/importance_threshold": -0.0004883744562563692, "compression/movement_sparsity/linear_layer_sparsity": 0.8827367934596799, "compression/movement_sparsity/model_sparsity": 0.8524120689883279, "compression_loss": 98.9841079711914, "distillation_loss": 4.041849613189697, "epoch": 3.77, "learning_rate": 3.462947309101155e-05, "loss": 102.9149, "step": 4456, "task_loss": 2.3575069904327393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.930412413127716, "compression/movement_sparsity/importance_threshold": -0.00048737300518873154, "compression/movement_sparsity/linear_layer_sparsity": 0.8828104132706637, "compression/movement_sparsity/model_sparsity": 0.8524831597323235, "compression_loss": 98.99867248535156, "distillation_loss": 5.348252296447754, "epoch": 3.77, "learning_rate": 3.4624776932469246e-05, "loss": 103.0928, "step": 4457, "task_loss": 2.9070706367492676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9305552056690547, "compression/movement_sparsity/importance_threshold": -0.00048637292409498076, "compression/movement_sparsity/linear_layer_sparsity": 0.882902455920645, "compression/movement_sparsity/model_sparsity": 0.8525720404341215, "compression_loss": 99.0132064819336, "distillation_loss": 4.84531307220459, "epoch": 3.77, "learning_rate": 3.4620080773926925e-05, "loss": 103.7254, "step": 4458, "task_loss": 1.7577147483825684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9306978027380881, "compression/movement_sparsity/importance_threshold": -0.0004853742120374191, "compression/movement_sparsity/linear_layer_sparsity": 0.8828593739029765, "compression/movement_sparsity/model_sparsity": 0.8525304384162958, "compression_loss": 99.02774810791016, "distillation_loss": 4.883671760559082, "epoch": 3.77, "learning_rate": 3.461538461538462e-05, "loss": 102.8411, "step": 4459, "task_loss": 2.4027936458587646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9308402044687013, "compression/movement_sparsity/importance_threshold": -0.0004843768680783539, "compression/movement_sparsity/linear_layer_sparsity": 0.8829496040794772, "compression/movement_sparsity/model_sparsity": 0.8526175689086531, "compression_loss": 99.04227447509766, "distillation_loss": 3.9085183143615723, "epoch": 3.77, "learning_rate": 3.4610688456842305e-05, "loss": 103.5924, "step": 4460, "task_loss": 3.0719332695007324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9309824109947794, "compression/movement_sparsity/importance_threshold": -0.00048338089128008733, "compression/movement_sparsity/linear_layer_sparsity": 0.8829862589707898, "compression/movement_sparsity/model_sparsity": 0.8526529645916855, "compression_loss": 99.05672454833984, "distillation_loss": 4.918030261993408, "epoch": 3.77, "learning_rate": 3.460599229829999e-05, "loss": 104.3093, "step": 4461, "task_loss": 2.458925485610962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9311244224502077, "compression/movement_sparsity/importance_threshold": -0.00048238628070492416, "compression/movement_sparsity/linear_layer_sparsity": 0.8831751377861419, "compression/movement_sparsity/model_sparsity": 0.852835354838671, "compression_loss": 99.07120513916016, "distillation_loss": 4.217590808868408, "epoch": 3.77, "learning_rate": 3.460129613975768e-05, "loss": 103.4413, "step": 4462, "task_loss": 1.9830992221832275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9312662389688712, "compression/movement_sparsity/importance_threshold": -0.0004813930354151674, "compression/movement_sparsity/linear_layer_sparsity": 0.8832467424127952, "compression/movement_sparsity/model_sparsity": 0.8529044996261173, "compression_loss": 99.085693359375, "distillation_loss": 4.841731071472168, "epoch": 3.77, "learning_rate": 3.4596599981215364e-05, "loss": 103.5519, "step": 4463, "task_loss": 1.8403431177139282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9314078606846552, "compression/movement_sparsity/importance_threshold": -0.00048040115447312015, "compression/movement_sparsity/linear_layer_sparsity": 0.8833865413541581, "compression/movement_sparsity/model_sparsity": 0.8530394960437725, "compression_loss": 99.10017395019531, "distillation_loss": 5.62965202331543, "epoch": 3.77, "learning_rate": 3.459190382267306e-05, "loss": 104.0416, "step": 4464, "task_loss": 3.3953192234039307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9315492877314446, "compression/movement_sparsity/importance_threshold": -0.00047941063694108884, "compression/movement_sparsity/linear_layer_sparsity": 0.8834621644253047, "compression/movement_sparsity/model_sparsity": 0.8531125212297814, "compression_loss": 99.11463165283203, "distillation_loss": 3.631840229034424, "epoch": 3.77, "learning_rate": 3.4587207664130743e-05, "loss": 103.4229, "step": 4465, "task_loss": 2.492821216583252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9316905202431248, "compression/movement_sparsity/importance_threshold": -0.0004784214818813765, "compression/movement_sparsity/linear_layer_sparsity": 0.8835139630095149, "compression/movement_sparsity/model_sparsity": 0.8531625403732729, "compression_loss": 99.12906646728516, "distillation_loss": 4.295794486999512, "epoch": 3.77, "learning_rate": 3.458251150558843e-05, "loss": 103.6775, "step": 4466, "task_loss": 1.5640379190444946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9318315583535808, "compression/movement_sparsity/importance_threshold": -0.00047743368835628623, "compression/movement_sparsity/linear_layer_sparsity": 0.8835177668189907, "compression/movement_sparsity/model_sparsity": 0.8531662135101914, "compression_loss": 99.14353942871094, "distillation_loss": 3.5572056770324707, "epoch": 3.78, "learning_rate": 3.4577815347046116e-05, "loss": 103.1365, "step": 4467, "task_loss": 1.8369367122650146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9319724021966976, "compression/movement_sparsity/importance_threshold": -0.00047644725542812447, "compression/movement_sparsity/linear_layer_sparsity": 0.8836164750786805, "compression/movement_sparsity/model_sparsity": 0.8532615308374986, "compression_loss": 99.15791320800781, "distillation_loss": 3.693638324737549, "epoch": 3.78, "learning_rate": 3.45731191885038e-05, "loss": 103.407, "step": 4468, "task_loss": 2.400531530380249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9321130519063607, "compression/movement_sparsity/importance_threshold": -0.00047546218215919166, "compression/movement_sparsity/linear_layer_sparsity": 0.8836956634759502, "compression/movement_sparsity/model_sparsity": 0.8533379988697102, "compression_loss": 99.17223358154297, "distillation_loss": 4.586825370788574, "epoch": 3.78, "learning_rate": 3.4568423029961496e-05, "loss": 103.826, "step": 4469, "task_loss": 2.677643299102783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.932253507616455, "compression/movement_sparsity/importance_threshold": -0.00047447846761179344, "compression/movement_sparsity/linear_layer_sparsity": 0.8837266901601387, "compression/movement_sparsity/model_sparsity": 0.8533679596918475, "compression_loss": 99.18660736083984, "distillation_loss": 5.2589545249938965, "epoch": 3.78, "learning_rate": 3.456372687141918e-05, "loss": 103.3893, "step": 4470, "task_loss": 2.635115623474121 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9323937694608657, "compression/movement_sparsity/importance_threshold": -0.00047349611084823457, "compression/movement_sparsity/linear_layer_sparsity": 0.8837526013764114, "compression/movement_sparsity/model_sparsity": 0.8533929807781291, "compression_loss": 99.20089721679688, "distillation_loss": 4.475529193878174, "epoch": 3.78, "learning_rate": 3.455903071287687e-05, "loss": 103.1814, "step": 4471, "task_loss": 2.3057191371917725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9325338375734779, "compression/movement_sparsity/importance_threshold": -0.0004725151109308181, "compression/movement_sparsity/linear_layer_sparsity": 0.8837134901065659, "compression/movement_sparsity/model_sparsity": 0.8533552131007229, "compression_loss": 99.2151870727539, "distillation_loss": 5.528947830200195, "epoch": 3.78, "learning_rate": 3.4554334554334555e-05, "loss": 103.7802, "step": 4472, "task_loss": 3.3463382720947266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9326737120881767, "compression/movement_sparsity/importance_threshold": -0.0004715354669218479, "compression/movement_sparsity/linear_layer_sparsity": 0.8837653363874466, "compression/movement_sparsity/model_sparsity": 0.8534052783023577, "compression_loss": 99.22945404052734, "distillation_loss": 4.84735107421875, "epoch": 3.78, "learning_rate": 3.454963839579224e-05, "loss": 103.5667, "step": 4473, "task_loss": 1.9125312566757202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9328133931388474, "compression/movement_sparsity/importance_threshold": -0.0004705571778836296, "compression/movement_sparsity/linear_layer_sparsity": 0.8839293413891102, "compression/movement_sparsity/model_sparsity": 0.8535636492276758, "compression_loss": 99.24368286132812, "distillation_loss": 4.769996166229248, "epoch": 3.78, "learning_rate": 3.4544942237249934e-05, "loss": 103.9858, "step": 4474, "task_loss": 4.012063980102539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.932952880859375, "compression/movement_sparsity/importance_threshold": -0.00046958024287846456, "compression/movement_sparsity/linear_layer_sparsity": 0.8839645415319712, "compression/movement_sparsity/model_sparsity": 0.8535976401373412, "compression_loss": 99.2579116821289, "distillation_loss": 4.299485206604004, "epoch": 3.78, "learning_rate": 3.4540246078707614e-05, "loss": 103.8881, "step": 4475, "task_loss": 1.764823317527771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9330921753836448, "compression/movement_sparsity/importance_threshold": -0.00046860466096865835, "compression/movement_sparsity/linear_layer_sparsity": 0.883999491267312, "compression/movement_sparsity/model_sparsity": 0.8536313892417551, "compression_loss": 99.27217864990234, "distillation_loss": 4.71946907043457, "epoch": 3.78, "learning_rate": 3.453554992016531e-05, "loss": 103.7803, "step": 4476, "task_loss": 2.5901169776916504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9332312768455415, "compression/movement_sparsity/importance_threshold": -0.00046763043121651575, "compression/movement_sparsity/linear_layer_sparsity": 0.8841489964811304, "compression/movement_sparsity/model_sparsity": 0.853775758491547, "compression_loss": 99.28636169433594, "distillation_loss": 4.358282089233398, "epoch": 3.78, "learning_rate": 3.453085376162299e-05, "loss": 103.1859, "step": 4477, "task_loss": 2.8471946716308594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9333701853789509, "compression/movement_sparsity/importance_threshold": -0.00046665755268433806, "compression/movement_sparsity/linear_layer_sparsity": 0.8842540126254995, "compression/movement_sparsity/model_sparsity": 0.8538771670082895, "compression_loss": 99.30052947998047, "distillation_loss": 4.1689982414245605, "epoch": 3.78, "learning_rate": 3.4526157603080686e-05, "loss": 103.225, "step": 4478, "task_loss": 2.4021146297454834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9335089011177578, "compression/movement_sparsity/importance_threshold": -0.0004656860244344309, "compression/movement_sparsity/linear_layer_sparsity": 0.8843395089074486, "compression/movement_sparsity/model_sparsity": 0.8539597262299363, "compression_loss": 99.31465148925781, "distillation_loss": 3.7200088500976562, "epoch": 3.79, "learning_rate": 3.4521461444538366e-05, "loss": 103.6094, "step": 4479, "task_loss": 2.500925064086914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9336474241958472, "compression/movement_sparsity/importance_threshold": -0.0004647158455290991, "compression/movement_sparsity/linear_layer_sparsity": 0.8843972218788062, "compression/movement_sparsity/model_sparsity": 0.8540154565831819, "compression_loss": 99.32880401611328, "distillation_loss": 5.050673961639404, "epoch": 3.79, "learning_rate": 3.451676528599606e-05, "loss": 103.7124, "step": 4480, "task_loss": 2.6829442977905273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9337857547471043, "compression/movement_sparsity/importance_threshold": -0.00046374701503064646, "compression/movement_sparsity/linear_layer_sparsity": 0.8844689576713035, "compression/movement_sparsity/model_sparsity": 0.8540847280305219, "compression_loss": 99.34294891357422, "distillation_loss": 4.245477676391602, "epoch": 3.79, "learning_rate": 3.4512069127453745e-05, "loss": 103.5173, "step": 4481, "task_loss": 2.4073433876037598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9339238929054146, "compression/movement_sparsity/importance_threshold": -0.0004627795320013752, "compression/movement_sparsity/linear_layer_sparsity": 0.8845436745057098, "compression/movement_sparsity/model_sparsity": 0.8541568781118105, "compression_loss": 99.35710144042969, "distillation_loss": 4.010534286499023, "epoch": 3.79, "learning_rate": 3.450737296891143e-05, "loss": 104.1237, "step": 4482, "task_loss": 2.232236623764038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9340618388046628, "compression/movement_sparsity/importance_threshold": -0.00046181339550359097, "compression/movement_sparsity/linear_layer_sparsity": 0.8846604955760384, "compression/movement_sparsity/model_sparsity": 0.8542696860189896, "compression_loss": 99.37120056152344, "distillation_loss": 4.53864860534668, "epoch": 3.79, "learning_rate": 3.4502676810369125e-05, "loss": 103.7125, "step": 4483, "task_loss": 1.998191475868225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9341995925787342, "compression/movement_sparsity/importance_threshold": -0.00046084860459959676, "compression/movement_sparsity/linear_layer_sparsity": 0.8847598358166128, "compression/movement_sparsity/model_sparsity": 0.8543656136166939, "compression_loss": 99.38532257080078, "distillation_loss": 3.650279998779297, "epoch": 3.79, "learning_rate": 3.4497980651826804e-05, "loss": 103.4996, "step": 4484, "task_loss": 1.7187650203704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.934337154361514, "compression/movement_sparsity/importance_threshold": -0.0004598851583516965, "compression/movement_sparsity/linear_layer_sparsity": 0.8848687630879664, "compression/movement_sparsity/model_sparsity": 0.854470798901177, "compression_loss": 99.39935302734375, "distillation_loss": 4.135272026062012, "epoch": 3.79, "learning_rate": 3.44932844932845e-05, "loss": 103.6663, "step": 4485, "task_loss": 2.5874385833740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9344745242868874, "compression/movement_sparsity/importance_threshold": -0.00045892305582219406, "compression/movement_sparsity/linear_layer_sparsity": 0.8849573835018362, "compression/movement_sparsity/model_sparsity": 0.8545563749312021, "compression_loss": 99.41337585449219, "distillation_loss": 4.7910871505737305, "epoch": 3.79, "learning_rate": 3.4488588334742184e-05, "loss": 103.2293, "step": 4486, "task_loss": 3.0408992767333984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9346117024887393, "compression/movement_sparsity/importance_threshold": -0.0004579622960733951, "compression/movement_sparsity/linear_layer_sparsity": 0.88496649356591, "compression/movement_sparsity/model_sparsity": 0.8545651720365491, "compression_loss": 99.42733001708984, "distillation_loss": 4.788797855377197, "epoch": 3.79, "learning_rate": 3.448389217619987e-05, "loss": 104.1743, "step": 4487, "task_loss": 3.5368692874908447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.934748689100955, "compression/movement_sparsity/importance_threshold": -0.00045700287816760263, "compression/movement_sparsity/linear_layer_sparsity": 0.8851069125639899, "compression/movement_sparsity/model_sparsity": 0.8547007672100656, "compression_loss": 99.44137573242188, "distillation_loss": 3.9559102058410645, "epoch": 3.79, "learning_rate": 3.4479196017657556e-05, "loss": 103.6178, "step": 4488, "task_loss": 3.2537994384765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9348854842574196, "compression/movement_sparsity/importance_threshold": -0.0004560448011671197, "compression/movement_sparsity/linear_layer_sparsity": 0.885300012534685, "compression/movement_sparsity/model_sparsity": 0.8548872336027225, "compression_loss": 99.45539855957031, "distillation_loss": 4.702653884887695, "epoch": 3.79, "learning_rate": 3.447449985911524e-05, "loss": 103.5255, "step": 4489, "task_loss": 1.8245919942855835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9350220880920183, "compression/movement_sparsity/importance_threshold": -0.00045508806413425106, "compression/movement_sparsity/linear_layer_sparsity": 0.8854365800266185, "compression/movement_sparsity/model_sparsity": 0.8550191095811773, "compression_loss": 99.46935272216797, "distillation_loss": 4.110479354858398, "epoch": 3.79, "learning_rate": 3.4469803700572936e-05, "loss": 103.9041, "step": 4490, "task_loss": 2.568847894668579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9351585007386362, "compression/movement_sparsity/importance_threshold": -0.0004541326661313006, "compression/movement_sparsity/linear_layer_sparsity": 0.8855180220915715, "compression/movement_sparsity/model_sparsity": 0.855097753860654, "compression_loss": 99.48329162597656, "distillation_loss": 4.633420944213867, "epoch": 3.8, "learning_rate": 3.446510754203062e-05, "loss": 104.0522, "step": 4491, "task_loss": 2.521723747253418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9352947223311583, "compression/movement_sparsity/importance_threshold": -0.000453178606220574, "compression/movement_sparsity/linear_layer_sparsity": 0.8855661957288203, "compression/movement_sparsity/model_sparsity": 0.8551442725852639, "compression_loss": 99.49725341796875, "distillation_loss": 5.031313896179199, "epoch": 3.8, "learning_rate": 3.446041138348831e-05, "loss": 103.5796, "step": 4492, "task_loss": 3.401355266571045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.93543075300347, "compression/movement_sparsity/importance_threshold": -0.00045222588346437165, "compression/movement_sparsity/linear_layer_sparsity": 0.8856698882905819, "compression/movement_sparsity/model_sparsity": 0.8552444029885333, "compression_loss": 99.51112365722656, "distillation_loss": 4.717130661010742, "epoch": 3.8, "learning_rate": 3.4455715224945995e-05, "loss": 103.933, "step": 4493, "task_loss": 1.9160329103469849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9355665928894563, "compression/movement_sparsity/importance_threshold": -0.00045127449692500005, "compression/movement_sparsity/linear_layer_sparsity": 0.8856541603134701, "compression/movement_sparsity/model_sparsity": 0.8552292153158203, "compression_loss": 99.52499389648438, "distillation_loss": 4.162189483642578, "epoch": 3.8, "learning_rate": 3.445101906640368e-05, "loss": 103.4923, "step": 4494, "task_loss": 2.042285442352295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9357022421230023, "compression/movement_sparsity/importance_threshold": -0.0004503244456647631, "compression/movement_sparsity/linear_layer_sparsity": 0.8858037251481268, "compression/movement_sparsity/model_sparsity": 0.8553736421382913, "compression_loss": 99.53885650634766, "distillation_loss": 5.512463092803955, "epoch": 3.8, "learning_rate": 3.4446322907861374e-05, "loss": 103.6631, "step": 4495, "task_loss": 3.061004877090454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9358377008379933, "compression/movement_sparsity/importance_threshold": -0.000449375728745963, "compression/movement_sparsity/linear_layer_sparsity": 0.885813049847218, "compression/movement_sparsity/model_sparsity": 0.8553826465052825, "compression_loss": 99.55267333984375, "distillation_loss": 5.128453731536865, "epoch": 3.8, "learning_rate": 3.444162674931906e-05, "loss": 103.9955, "step": 4496, "task_loss": 2.7442898750305176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9359729691683143, "compression/movement_sparsity/importance_threshold": -0.0004484283452309062, "compression/movement_sparsity/linear_layer_sparsity": 0.8858931206328928, "compression/movement_sparsity/model_sparsity": 0.8554599666131428, "compression_loss": 99.56643676757812, "distillation_loss": 5.407352447509766, "epoch": 3.8, "learning_rate": 3.443693059077675e-05, "loss": 104.2077, "step": 4497, "task_loss": 2.7361977100372314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9361080472478505, "compression/movement_sparsity/importance_threshold": -0.00044748229418189577, "compression/movement_sparsity/linear_layer_sparsity": 0.8860048023869703, "compression/movement_sparsity/model_sparsity": 0.8555678117553945, "compression_loss": 99.58023071289062, "distillation_loss": 5.9511518478393555, "epoch": 3.8, "learning_rate": 3.443223443223443e-05, "loss": 104.3643, "step": 4498, "task_loss": 2.9797017574310303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.936242935210487, "compression/movement_sparsity/importance_threshold": -0.0004465375746612347, "compression/movement_sparsity/linear_layer_sparsity": 0.8860839788600725, "compression/movement_sparsity/model_sparsity": 0.8556442682730703, "compression_loss": 99.593994140625, "distillation_loss": 5.060459136962891, "epoch": 3.8, "learning_rate": 3.442753827369212e-05, "loss": 104.4925, "step": 4499, "task_loss": 1.8724145889282227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9363776331901089, "compression/movement_sparsity/importance_threshold": -0.0004455941857312278, "compression/movement_sparsity/linear_layer_sparsity": 0.8862552695281617, "compression/movement_sparsity/model_sparsity": 0.8558096745797589, "compression_loss": 99.60780334472656, "distillation_loss": 3.715949535369873, "epoch": 3.8, "learning_rate": 3.442284211514981e-05, "loss": 103.7155, "step": 4500, "task_loss": 1.512458086013794 }, { "epoch": 3.8, "eval_accuracy": 0.5622178217821783, "eval_loss": 103.5300521850586, "eval_runtime": 231.5763, "eval_samples_per_second": 109.035, "eval_steps_per_second": 0.855, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9365121413206015, "compression/movement_sparsity/importance_threshold": -0.00044465212645417897, "compression/movement_sparsity/linear_layer_sparsity": 0.8863109315426858, "compression/movement_sparsity/model_sparsity": 0.8558634244328478, "compression_loss": 99.62156677246094, "distillation_loss": 4.346074104309082, "epoch": 3.8, "learning_rate": 3.441814595660749e-05, "loss": 103.7796, "step": 4501, "task_loss": 1.918161153793335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9366464597358498, "compression/movement_sparsity/importance_threshold": -0.00044371139589239206, "compression/movement_sparsity/linear_layer_sparsity": 0.8864163650329222, "compression/movement_sparsity/model_sparsity": 0.8559652359583432, "compression_loss": 99.6353530883789, "distillation_loss": 4.872063159942627, "epoch": 3.81, "learning_rate": 3.4413449798065185e-05, "loss": 104.2235, "step": 4502, "task_loss": 3.331578016281128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.936780588569739, "compression/movement_sparsity/importance_threshold": -0.000442771993108171, "compression/movement_sparsity/linear_layer_sparsity": 0.8864189525772991, "compression/movement_sparsity/model_sparsity": 0.8559677346126106, "compression_loss": 99.64900970458984, "distillation_loss": 4.670708179473877, "epoch": 3.81, "learning_rate": 3.440875363952287e-05, "loss": 104.3786, "step": 4503, "task_loss": 2.4652576446533203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9369145279561542, "compression/movement_sparsity/importance_threshold": -0.00044183391716382057, "compression/movement_sparsity/linear_layer_sparsity": 0.8865227405324017, "compression/movement_sparsity/model_sparsity": 0.8560679571321662, "compression_loss": 99.66278839111328, "distillation_loss": 3.938246965408325, "epoch": 3.81, "learning_rate": 3.4404057480980565e-05, "loss": 103.2839, "step": 4504, "task_loss": 1.8551729917526245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9370482780289806, "compression/movement_sparsity/importance_threshold": -0.0004408971671216429, "compression/movement_sparsity/linear_layer_sparsity": 0.8867438861453764, "compression/movement_sparsity/model_sparsity": 0.8562815057130118, "compression_loss": 99.67642211914062, "distillation_loss": 4.400326251983643, "epoch": 3.81, "learning_rate": 3.4399361322438244e-05, "loss": 104.1381, "step": 4505, "task_loss": 2.278688907623291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9371818389221033, "compression/movement_sparsity/importance_threshold": -0.0004399617420439445, "compression/movement_sparsity/linear_layer_sparsity": 0.886746914883956, "compression/movement_sparsity/model_sparsity": 0.8562844304051036, "compression_loss": 99.6900863647461, "distillation_loss": 4.146790504455566, "epoch": 3.81, "learning_rate": 3.439466516389593e-05, "loss": 103.8147, "step": 4506, "task_loss": 2.4132161140441895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9373152107694074, "compression/movement_sparsity/importance_threshold": -0.0004390276409930276, "compression/movement_sparsity/linear_layer_sparsity": 0.8869162261402175, "compression/movement_sparsity/model_sparsity": 0.8564479252988503, "compression_loss": 99.7037124633789, "distillation_loss": 5.367804050445557, "epoch": 3.81, "learning_rate": 3.4389969005353624e-05, "loss": 104.2516, "step": 4507, "task_loss": 3.248568296432495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9374483937047781, "compression/movement_sparsity/importance_threshold": -0.00043809486303119597, "compression/movement_sparsity/linear_layer_sparsity": 0.8870868729032544, "compression/movement_sparsity/model_sparsity": 0.856612709820606, "compression_loss": 99.71730041503906, "distillation_loss": 3.691837787628174, "epoch": 3.81, "learning_rate": 3.438527284681131e-05, "loss": 104.44, "step": 4508, "task_loss": 1.766798973083496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9375813878621005, "compression/movement_sparsity/importance_threshold": -0.0004371634072207545, "compression/movement_sparsity/linear_layer_sparsity": 0.8872165005296239, "compression/movement_sparsity/model_sparsity": 0.8567378843392284, "compression_loss": 99.73088836669922, "distillation_loss": 5.886437892913818, "epoch": 3.81, "learning_rate": 3.4380576688268997e-05, "loss": 104.1922, "step": 4509, "task_loss": 3.457476854324341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9377141933752599, "compression/movement_sparsity/importance_threshold": -0.000436233272624007, "compression/movement_sparsity/linear_layer_sparsity": 0.8872866861803285, "compression/movement_sparsity/model_sparsity": 0.8568056588969151, "compression_loss": 99.74446868896484, "distillation_loss": 6.532523155212402, "epoch": 3.81, "learning_rate": 3.437588052972668e-05, "loss": 103.7783, "step": 4510, "task_loss": 3.3756752014160156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9378468103781411, "compression/movement_sparsity/importance_threshold": -0.00043530445830325826, "compression/movement_sparsity/linear_layer_sparsity": 0.8872824054041473, "compression/movement_sparsity/model_sparsity": 0.8568015251785648, "compression_loss": 99.75796508789062, "distillation_loss": 4.524733543395996, "epoch": 3.81, "learning_rate": 3.4371184371184376e-05, "loss": 104.1183, "step": 4511, "task_loss": 2.351926803588867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9379792390046297, "compression/movement_sparsity/importance_threshold": -0.0004343769633208096, "compression/movement_sparsity/linear_layer_sparsity": 0.8873526029790195, "compression/movement_sparsity/model_sparsity": 0.8568693112507872, "compression_loss": 99.77149963378906, "distillation_loss": 3.206489086151123, "epoch": 3.81, "learning_rate": 3.436648821264206e-05, "loss": 103.5867, "step": 4512, "task_loss": 2.998471260070801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9381114793886104, "compression/movement_sparsity/importance_threshold": -0.00043345078673896753, "compression/movement_sparsity/linear_layer_sparsity": 0.8874244222406904, "compression/movement_sparsity/model_sparsity": 0.8569386632998778, "compression_loss": 99.78498077392578, "distillation_loss": 4.093575954437256, "epoch": 3.81, "learning_rate": 3.436179205409975e-05, "loss": 103.7978, "step": 4513, "task_loss": 1.60451340675354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9382435316639686, "compression/movement_sparsity/importance_threshold": -0.00043252592762003505, "compression/movement_sparsity/linear_layer_sparsity": 0.8875451663621711, "compression/movement_sparsity/model_sparsity": 0.8570552594893334, "compression_loss": 99.79851531982422, "distillation_loss": 4.923433303833008, "epoch": 3.82, "learning_rate": 3.4357095895557435e-05, "loss": 104.6617, "step": 4514, "task_loss": 2.323650598526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9383753959645893, "compression/movement_sparsity/importance_threshold": -0.00043160238502631693, "compression/movement_sparsity/linear_layer_sparsity": 0.88754199453358, "compression/movement_sparsity/model_sparsity": 0.857052196622812, "compression_loss": 99.81199645996094, "distillation_loss": 3.5554633140563965, "epoch": 3.82, "learning_rate": 3.435239973701512e-05, "loss": 103.4666, "step": 4515, "task_loss": 2.3709278106689453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9385070724243578, "compression/movement_sparsity/importance_threshold": -0.00043068015802011535, "compression/movement_sparsity/linear_layer_sparsity": 0.8875698493891774, "compression/movement_sparsity/model_sparsity": 0.857079094578428, "compression_loss": 99.825439453125, "distillation_loss": 3.8403117656707764, "epoch": 3.82, "learning_rate": 3.4347703578472815e-05, "loss": 103.2009, "step": 4516, "task_loss": 1.60381281375885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9386385611771592, "compression/movement_sparsity/importance_threshold": -0.0004297592456637359, "compression/movement_sparsity/linear_layer_sparsity": 0.8876320339233984, "compression/movement_sparsity/model_sparsity": 0.8571391428825965, "compression_loss": 99.83885192871094, "distillation_loss": 4.844111442565918, "epoch": 3.82, "learning_rate": 3.43430074199305e-05, "loss": 104.2777, "step": 4517, "task_loss": 2.194101095199585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9387698623568784, "compression/movement_sparsity/importance_threshold": -0.00042883964701948257, "compression/movement_sparsity/linear_layer_sparsity": 0.8876756048319399, "compression/movement_sparsity/model_sparsity": 0.8571812169963898, "compression_loss": 99.85223388671875, "distillation_loss": 5.410505294799805, "epoch": 3.82, "learning_rate": 3.433831126138819e-05, "loss": 104.8502, "step": 4518, "task_loss": 2.6061019897460938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9389009760974009, "compression/movement_sparsity/importance_threshold": -0.00042792136114965917, "compression/movement_sparsity/linear_layer_sparsity": 0.8877567249443666, "compression/movement_sparsity/model_sparsity": 0.8572595503834001, "compression_loss": 99.8655776977539, "distillation_loss": 2.262774705886841, "epoch": 3.82, "learning_rate": 3.4333615102845874e-05, "loss": 103.7813, "step": 4519, "task_loss": 1.1959831714630127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9390319025326116, "compression/movement_sparsity/importance_threshold": -0.00042700438711656876, "compression/movement_sparsity/linear_layer_sparsity": 0.8878679655042415, "compression/movement_sparsity/model_sparsity": 0.8573669694878273, "compression_loss": 99.87895965576172, "distillation_loss": 5.981935501098633, "epoch": 3.82, "learning_rate": 3.432891894430356e-05, "loss": 104.8295, "step": 4520, "task_loss": 3.0067951679229736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9391626417963957, "compression/movement_sparsity/importance_threshold": -0.0004260887239825161, "compression/movement_sparsity/linear_layer_sparsity": 0.8879079949349952, "compression/movement_sparsity/model_sparsity": 0.8574056237844896, "compression_loss": 99.89231872558594, "distillation_loss": 4.2976179122924805, "epoch": 3.82, "learning_rate": 3.432422278576125e-05, "loss": 104.2309, "step": 4521, "task_loss": 2.67098069190979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9392931940226384, "compression/movement_sparsity/importance_threshold": -0.0004251743708098051, "compression/movement_sparsity/linear_layer_sparsity": 0.8879076014374632, "compression/movement_sparsity/model_sparsity": 0.8574052438048084, "compression_loss": 99.9056396484375, "distillation_loss": 5.427758693695068, "epoch": 3.82, "learning_rate": 3.431952662721893e-05, "loss": 104.011, "step": 4522, "task_loss": 2.4527268409729004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9394235593452248, "compression/movement_sparsity/importance_threshold": -0.0004242613266607397, "compression/movement_sparsity/linear_layer_sparsity": 0.8879777513156649, "compression/movement_sparsity/model_sparsity": 0.8574729838188877, "compression_loss": 99.9189453125, "distillation_loss": 5.155549049377441, "epoch": 3.82, "learning_rate": 3.4314830468676626e-05, "loss": 104.9395, "step": 4523, "task_loss": 2.3619422912597656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.93955373789804, "compression/movement_sparsity/importance_threshold": -0.0004233495905976237, "compression/movement_sparsity/linear_layer_sparsity": 0.8880392561723307, "compression/movement_sparsity/model_sparsity": 0.8575323757945159, "compression_loss": 99.93220520019531, "distillation_loss": 4.723738670349121, "epoch": 3.82, "learning_rate": 3.431013431013431e-05, "loss": 104.3734, "step": 4524, "task_loss": 3.218928575515747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9396837298149692, "compression/movement_sparsity/importance_threshold": -0.00042243916168276194, "compression/movement_sparsity/linear_layer_sparsity": 0.8880883956671581, "compression/movement_sparsity/model_sparsity": 0.8575798271965253, "compression_loss": 99.94552612304688, "distillation_loss": 4.045338153839111, "epoch": 3.82, "learning_rate": 3.4305438151592e-05, "loss": 104.1328, "step": 4525, "task_loss": 2.146289825439453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9398135352298975, "compression/movement_sparsity/importance_threshold": -0.00042153003897845657, "compression/movement_sparsity/linear_layer_sparsity": 0.8881129475283204, "compression/movement_sparsity/model_sparsity": 0.8576035356257262, "compression_loss": 99.958740234375, "distillation_loss": 4.084800720214844, "epoch": 3.83, "learning_rate": 3.4300741993049685e-05, "loss": 104.1845, "step": 4526, "task_loss": 2.2716400623321533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9399431542767102, "compression/movement_sparsity/importance_threshold": -0.00042062222154701234, "compression/movement_sparsity/linear_layer_sparsity": 0.8881531796699238, "compression/movement_sparsity/model_sparsity": 0.857642385669497, "compression_loss": 99.97191619873047, "distillation_loss": 4.517703056335449, "epoch": 3.83, "learning_rate": 3.429604583450737e-05, "loss": 103.9249, "step": 4527, "task_loss": 2.561556339263916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9400725870892921, "compression/movement_sparsity/importance_threshold": -0.0004197157084507349, "compression/movement_sparsity/linear_layer_sparsity": 0.8881672740360693, "compression/movement_sparsity/model_sparsity": 0.8576559958508061, "compression_loss": 99.98516845703125, "distillation_loss": 2.7468767166137695, "epoch": 3.83, "learning_rate": 3.4291349675965064e-05, "loss": 103.6224, "step": 4528, "task_loss": 2.8449552059173584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9402018338015287, "compression/movement_sparsity/importance_threshold": -0.0004188104987519264, "compression/movement_sparsity/linear_layer_sparsity": 0.8882513394179022, "compression/movement_sparsity/model_sparsity": 0.8577371733281576, "compression_loss": 99.9983139038086, "distillation_loss": 5.504179000854492, "epoch": 3.83, "learning_rate": 3.428665351742275e-05, "loss": 104.7667, "step": 4529, "task_loss": 2.6728708744049072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9403308945473048, "compression/movement_sparsity/importance_threshold": -0.0004179065915128916, "compression/movement_sparsity/linear_layer_sparsity": 0.8882881254750589, "compression/movement_sparsity/model_sparsity": 0.8577726956710838, "compression_loss": 100.011474609375, "distillation_loss": 3.343381404876709, "epoch": 3.83, "learning_rate": 3.428195735888044e-05, "loss": 103.4983, "step": 4530, "task_loss": 2.350043773651123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9404597694605058, "compression/movement_sparsity/importance_threshold": -0.0004170039857959336, "compression/movement_sparsity/linear_layer_sparsity": 0.888313500103788, "compression/movement_sparsity/model_sparsity": 0.8577971986032545, "compression_loss": 100.02455139160156, "distillation_loss": 5.592968940734863, "epoch": 3.83, "learning_rate": 3.427726120033812e-05, "loss": 104.6841, "step": 4531, "task_loss": 3.4871695041656494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9405884586750167, "compression/movement_sparsity/importance_threshold": -0.00041610268066335793, "compression/movement_sparsity/linear_layer_sparsity": 0.8883812889967978, "compression/movement_sparsity/model_sparsity": 0.8578626587392465, "compression_loss": 100.03768157958984, "distillation_loss": 4.361526012420654, "epoch": 3.83, "learning_rate": 3.427256504179581e-05, "loss": 104.7224, "step": 4532, "task_loss": 1.7217413187026978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9407169623247228, "compression/movement_sparsity/importance_threshold": -0.0004152026751774668, "compression/movement_sparsity/linear_layer_sparsity": 0.8884393119965139, "compression/movement_sparsity/model_sparsity": 0.8579186884704227, "compression_loss": 100.05084228515625, "distillation_loss": 4.935878276824951, "epoch": 3.83, "learning_rate": 3.42678688832535e-05, "loss": 104.6758, "step": 4533, "task_loss": 1.5802088975906372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9408452805435091, "compression/movement_sparsity/importance_threshold": -0.000414303968400565, "compression/movement_sparsity/linear_layer_sparsity": 0.8885264895860997, "compression/movement_sparsity/model_sparsity": 0.8580028712416167, "compression_loss": 100.0638656616211, "distillation_loss": 3.853682041168213, "epoch": 3.83, "learning_rate": 3.426317272471119e-05, "loss": 104.6535, "step": 4534, "task_loss": 2.6037731170654297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9409734134652609, "compression/movement_sparsity/importance_threshold": -0.0004134065593949564, "compression/movement_sparsity/linear_layer_sparsity": 0.8886348921940773, "compression/movement_sparsity/model_sparsity": 0.8581075498865248, "compression_loss": 100.07693481445312, "distillation_loss": 5.714635848999023, "epoch": 3.83, "learning_rate": 3.4258476566168875e-05, "loss": 104.8832, "step": 4535, "task_loss": 3.432044744491577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9411013612238631, "compression/movement_sparsity/importance_threshold": -0.0004125104472229449, "compression/movement_sparsity/linear_layer_sparsity": 0.8887242757546758, "compression/movement_sparsity/model_sparsity": 0.8581938628468407, "compression_loss": 100.08992767333984, "distillation_loss": 5.693085670471191, "epoch": 3.83, "learning_rate": 3.425378040762656e-05, "loss": 103.8307, "step": 4536, "task_loss": 2.228610038757324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.941229123953201, "compression/movement_sparsity/importance_threshold": -0.0004116156309468353, "compression/movement_sparsity/linear_layer_sparsity": 0.8887757881588627, "compression/movement_sparsity/model_sparsity": 0.8582436056414732, "compression_loss": 100.10295867919922, "distillation_loss": 5.111946105957031, "epoch": 3.83, "learning_rate": 3.424908424908425e-05, "loss": 105.0145, "step": 4537, "task_loss": 1.9150798320770264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9413567017871597, "compression/movement_sparsity/importance_threshold": -0.0004107221096289306, "compression/movement_sparsity/linear_layer_sparsity": 0.8888842980843491, "compression/movement_sparsity/model_sparsity": 0.8583483879172035, "compression_loss": 100.1159439086914, "distillation_loss": 4.226686954498291, "epoch": 3.84, "learning_rate": 3.424438809054194e-05, "loss": 104.7049, "step": 4538, "task_loss": 2.518601894378662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9414840948596243, "compression/movement_sparsity/importance_threshold": -0.0004098298823315364, "compression/movement_sparsity/linear_layer_sparsity": 0.8889090884288641, "compression/movement_sparsity/model_sparsity": 0.8583723266371203, "compression_loss": 100.12889099121094, "distillation_loss": 4.587031841278076, "epoch": 3.84, "learning_rate": 3.423969193199962e-05, "loss": 104.2651, "step": 4539, "task_loss": 2.4530508518218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9416113033044801, "compression/movement_sparsity/importance_threshold": -0.0004089389481169532, "compression/movement_sparsity/linear_layer_sparsity": 0.8890357946341627, "compression/movement_sparsity/model_sparsity": 0.858494680094473, "compression_loss": 100.14183044433594, "distillation_loss": 4.548207759857178, "epoch": 3.84, "learning_rate": 3.4234995773457314e-05, "loss": 104.1211, "step": 4540, "task_loss": 2.712223529815674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.941738327255612, "compression/movement_sparsity/importance_threshold": -0.0004080493060474883, "compression/movement_sparsity/linear_layer_sparsity": 0.889160485655131, "compression/movement_sparsity/model_sparsity": 0.8586150875952766, "compression_loss": 100.15477752685547, "distillation_loss": 4.381241798400879, "epoch": 3.84, "learning_rate": 3.4230299614915e-05, "loss": 104.1161, "step": 4541, "task_loss": 2.9729394912719727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9418651668469054, "compression/movement_sparsity/importance_threshold": -0.0004071609551854448, "compression/movement_sparsity/linear_layer_sparsity": 0.8892655614203382, "compression/movement_sparsity/model_sparsity": 0.8587165536846981, "compression_loss": 100.16768646240234, "distillation_loss": 4.094034194946289, "epoch": 3.84, "learning_rate": 3.422560345637269e-05, "loss": 104.069, "step": 4542, "task_loss": 1.6684879064559937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9419918222122452, "compression/movement_sparsity/importance_threshold": -0.00040627389459312654, "compression/movement_sparsity/linear_layer_sparsity": 0.8893226901074816, "compression/movement_sparsity/model_sparsity": 0.8587717198256898, "compression_loss": 100.18064880371094, "distillation_loss": 5.238960266113281, "epoch": 3.84, "learning_rate": 3.422090729783038e-05, "loss": 104.8357, "step": 4543, "task_loss": 3.900768756866455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9421182934855167, "compression/movement_sparsity/importance_threshold": -0.0004053881233328366, "compression/movement_sparsity/linear_layer_sparsity": 0.8893198879280871, "compression/movement_sparsity/model_sparsity": 0.858769013909778, "compression_loss": 100.19351959228516, "distillation_loss": 5.322239875793457, "epoch": 3.84, "learning_rate": 3.421621113928806e-05, "loss": 105.3242, "step": 4544, "task_loss": 3.2929630279541016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9422445808006049, "compression/movement_sparsity/importance_threshold": -0.00040450364046688055, "compression/movement_sparsity/linear_layer_sparsity": 0.8893775174302713, "compression/movement_sparsity/model_sparsity": 0.8588246636612731, "compression_loss": 100.20641326904297, "distillation_loss": 4.867955207824707, "epoch": 3.84, "learning_rate": 3.421151498074575e-05, "loss": 105.1653, "step": 4545, "task_loss": 1.9060171842575073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9423706842913951, "compression/movement_sparsity/importance_threshold": -0.0004036204450575606, "compression/movement_sparsity/linear_layer_sparsity": 0.8894493247677745, "compression/movement_sparsity/model_sparsity": 0.8588940041958277, "compression_loss": 100.21927642822266, "distillation_loss": 4.625563621520996, "epoch": 3.84, "learning_rate": 3.420681882220344e-05, "loss": 105.438, "step": 4546, "task_loss": 3.3087544441223145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9424966040917724, "compression/movement_sparsity/importance_threshold": -0.0004027385361671833, "compression/movement_sparsity/linear_layer_sparsity": 0.8895555452530747, "compression/movement_sparsity/model_sparsity": 0.8589965756806855, "compression_loss": 100.2320785522461, "distillation_loss": 7.1838884353637695, "epoch": 3.84, "learning_rate": 3.420212266366113e-05, "loss": 105.4651, "step": 4547, "task_loss": 3.5911319255828857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9426223403356219, "compression/movement_sparsity/importance_threshold": -0.0004018579128580498, "compression/movement_sparsity/linear_layer_sparsity": 0.889582648886111, "compression/movement_sparsity/model_sparsity": 0.8590227482205465, "compression_loss": 100.24492645263672, "distillation_loss": 5.1949615478515625, "epoch": 3.84, "learning_rate": 3.419742650511881e-05, "loss": 104.4174, "step": 4548, "task_loss": 3.3867945671081543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9427478931568287, "compression/movement_sparsity/importance_threshold": -0.00040097857419246503, "compression/movement_sparsity/linear_layer_sparsity": 0.8897170819520378, "compression/movement_sparsity/model_sparsity": 0.8591525630970942, "compression_loss": 100.25775146484375, "distillation_loss": 3.4227688312530518, "epoch": 3.84, "learning_rate": 3.4192730346576504e-05, "loss": 104.3519, "step": 4549, "task_loss": 2.013906717300415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9428732626892781, "compression/movement_sparsity/importance_threshold": -0.0004001005192327345, "compression/movement_sparsity/linear_layer_sparsity": 0.88976488594009, "compression/movement_sparsity/model_sparsity": 0.8591987248710945, "compression_loss": 100.27051544189453, "distillation_loss": 6.102532386779785, "epoch": 3.85, "learning_rate": 3.418803418803419e-05, "loss": 104.9936, "step": 4550, "task_loss": 2.461862564086914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9429984490668549, "compression/movement_sparsity/importance_threshold": -0.00039922374704116047, "compression/movement_sparsity/linear_layer_sparsity": 0.8898026974756632, "compression/movement_sparsity/model_sparsity": 0.859235237464099, "compression_loss": 100.28326416015625, "distillation_loss": 3.8065192699432373, "epoch": 3.85, "learning_rate": 3.418333802949188e-05, "loss": 104.7014, "step": 4551, "task_loss": 2.004796266555786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9431234524234448, "compression/movement_sparsity/importance_threshold": -0.00039834825668004677, "compression/movement_sparsity/linear_layer_sparsity": 0.8899704705142999, "compression/movement_sparsity/model_sparsity": 0.8593972469827281, "compression_loss": 100.29605865478516, "distillation_loss": 3.8205912113189697, "epoch": 3.85, "learning_rate": 3.4178641870949563e-05, "loss": 104.9145, "step": 4552, "task_loss": 2.087481737136841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9432482728929324, "compression/movement_sparsity/importance_threshold": -0.0003974740472116982, "compression/movement_sparsity/linear_layer_sparsity": 0.8899590113892017, "compression/movement_sparsity/model_sparsity": 0.8593861815138296, "compression_loss": 100.3088150024414, "distillation_loss": 3.8403329849243164, "epoch": 3.85, "learning_rate": 3.417394571240725e-05, "loss": 104.9877, "step": 4553, "task_loss": 1.9831923246383667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.943372910609203, "compression/movement_sparsity/importance_threshold": -0.0003966011176984195, "compression/movement_sparsity/linear_layer_sparsity": 0.8899558753331136, "compression/movement_sparsity/model_sparsity": 0.8593831531909156, "compression_loss": 100.32152557373047, "distillation_loss": 5.918248653411865, "epoch": 3.85, "learning_rate": 3.416924955386494e-05, "loss": 104.7306, "step": 4554, "task_loss": 2.389813184738159 }, { "compression/movement_sparsity/importance_regularization_factor": 0.943497365706142, "compression/movement_sparsity/importance_threshold": -0.00039572946720251283, "compression/movement_sparsity/linear_layer_sparsity": 0.8900250355054016, "compression/movement_sparsity/model_sparsity": 0.8594499374985239, "compression_loss": 100.3342056274414, "distillation_loss": 4.823528289794922, "epoch": 3.85, "learning_rate": 3.416455339532263e-05, "loss": 104.8423, "step": 4555, "task_loss": 3.9171040058135986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9436216383176342, "compression/movement_sparsity/importance_threshold": -0.000394859094786283, "compression/movement_sparsity/linear_layer_sparsity": 0.8901544604209212, "compression/movement_sparsity/model_sparsity": 0.8595749162700379, "compression_loss": 100.34680938720703, "distillation_loss": 4.388155460357666, "epoch": 3.85, "learning_rate": 3.4159857236780316e-05, "loss": 104.1821, "step": 4556, "task_loss": 2.3760643005371094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9437457285775649, "compression/movement_sparsity/importance_threshold": -0.0003939899995120339, "compression/movement_sparsity/linear_layer_sparsity": 0.8902404217454082, "compression/movement_sparsity/model_sparsity": 0.8596579245585808, "compression_loss": 100.35943603515625, "distillation_loss": 5.23970890045166, "epoch": 3.85, "learning_rate": 3.4155161078238e-05, "loss": 104.6851, "step": 4557, "task_loss": 2.7074804306030273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9438696366198193, "compression/movement_sparsity/importance_threshold": -0.00039312218044207023, "compression/movement_sparsity/linear_layer_sparsity": 0.8902626841663843, "compression/movement_sparsity/model_sparsity": 0.8596794221969092, "compression_loss": 100.3719711303711, "distillation_loss": 3.5079846382141113, "epoch": 3.85, "learning_rate": 3.415046491969569e-05, "loss": 105.0793, "step": 4558, "task_loss": 2.076284885406494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9439933625782824, "compression/movement_sparsity/importance_threshold": -0.00039225563663869423, "compression/movement_sparsity/linear_layer_sparsity": 0.8903582205974828, "compression/movement_sparsity/model_sparsity": 0.859771676657695, "compression_loss": 100.38452911376953, "distillation_loss": 5.416142463684082, "epoch": 3.85, "learning_rate": 3.414576876115338e-05, "loss": 105.6541, "step": 4559, "task_loss": 2.8598122596740723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9441169065868393, "compression/movement_sparsity/importance_threshold": -0.00039139036716421237, "compression/movement_sparsity/linear_layer_sparsity": 0.8903723507361313, "compression/movement_sparsity/model_sparsity": 0.8597853213826115, "compression_loss": 100.39707946777344, "distillation_loss": 5.128992080688477, "epoch": 3.85, "learning_rate": 3.414107260261107e-05, "loss": 105.5694, "step": 4560, "task_loss": 2.923676013946533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9442402687793753, "compression/movement_sparsity/importance_threshold": -0.0003905263710809277, "compression/movement_sparsity/linear_layer_sparsity": 0.8904547348103276, "compression/movement_sparsity/model_sparsity": 0.859864875310416, "compression_loss": 100.4096450805664, "distillation_loss": 2.975842237472534, "epoch": 3.85, "learning_rate": 3.4136376444068754e-05, "loss": 103.6428, "step": 4561, "task_loss": 1.151943325996399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9443634492897756, "compression/movement_sparsity/importance_threshold": -0.0003896636474511415, "compression/movement_sparsity/linear_layer_sparsity": 0.8905473021236847, "compression/movement_sparsity/model_sparsity": 0.8599542626517891, "compression_loss": 100.422119140625, "distillation_loss": 5.097267150878906, "epoch": 3.86, "learning_rate": 3.413168028552644e-05, "loss": 104.6303, "step": 4562, "task_loss": 3.3521292209625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9444864482519251, "compression/movement_sparsity/importance_threshold": -0.00038880219533716197, "compression/movement_sparsity/linear_layer_sparsity": 0.8905874150236117, "compression/movement_sparsity/model_sparsity": 0.8599929975502019, "compression_loss": 100.4345703125, "distillation_loss": 4.044228553771973, "epoch": 3.86, "learning_rate": 3.412698412698413e-05, "loss": 103.9783, "step": 4563, "task_loss": 1.4565430879592896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.944609265799709, "compression/movement_sparsity/importance_threshold": -0.00038794201380129047, "compression/movement_sparsity/linear_layer_sparsity": 0.8907103651161051, "compression/movement_sparsity/model_sparsity": 0.8601117239287794, "compression_loss": 100.44706726074219, "distillation_loss": 5.892083644866943, "epoch": 3.86, "learning_rate": 3.412228796844182e-05, "loss": 105.0898, "step": 4564, "task_loss": 3.5995705127716064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9447319020670126, "compression/movement_sparsity/importance_threshold": -0.00038708310190583173, "compression/movement_sparsity/linear_layer_sparsity": 0.8907812185201974, "compression/movement_sparsity/model_sparsity": 0.8601801433004705, "compression_loss": 100.45948028564453, "distillation_loss": 4.049729347229004, "epoch": 3.86, "learning_rate": 3.41175918098995e-05, "loss": 105.1806, "step": 4565, "task_loss": 0.9365973472595215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9448543571877209, "compression/movement_sparsity/importance_threshold": -0.00038622545871308967, "compression/movement_sparsity/linear_layer_sparsity": 0.890756249313168, "compression/movement_sparsity/model_sparsity": 0.8601560318625168, "compression_loss": 100.4719009399414, "distillation_loss": 5.677170276641846, "epoch": 3.86, "learning_rate": 3.411289565135719e-05, "loss": 104.8938, "step": 4566, "task_loss": 3.0070064067840576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9449766312957192, "compression/movement_sparsity/importance_threshold": -0.0003853690832853682, "compression/movement_sparsity/linear_layer_sparsity": 0.8908949512311083, "compression/movement_sparsity/model_sparsity": 0.8602899689428789, "compression_loss": 100.48423767089844, "distillation_loss": 4.856156349182129, "epoch": 3.86, "learning_rate": 3.410819949281488e-05, "loss": 105.0563, "step": 4567, "task_loss": 3.40464448928833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9450987245248924, "compression/movement_sparsity/importance_threshold": -0.00038451397468497114, "compression/movement_sparsity/linear_layer_sparsity": 0.8909962589593426, "compression/movement_sparsity/model_sparsity": 0.8603877964389893, "compression_loss": 100.49665069580078, "distillation_loss": 4.184131622314453, "epoch": 3.86, "learning_rate": 3.4103503334272565e-05, "loss": 104.89, "step": 4568, "task_loss": 2.2326037883758545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9452206370091258, "compression/movement_sparsity/importance_threshold": -0.0003836601319742025, "compression/movement_sparsity/linear_layer_sparsity": 0.8910834365489284, "compression/movement_sparsity/model_sparsity": 0.8604719792101833, "compression_loss": 100.5090103149414, "distillation_loss": 3.6449947357177734, "epoch": 3.86, "learning_rate": 3.409880717573025e-05, "loss": 104.3568, "step": 4569, "task_loss": 0.9449997544288635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9453423688823045, "compression/movement_sparsity/importance_threshold": -0.00038280755421536694, "compression/movement_sparsity/linear_layer_sparsity": 0.8911646043580257, "compression/movement_sparsity/model_sparsity": 0.8605503586553367, "compression_loss": 100.52131652832031, "distillation_loss": 4.239992141723633, "epoch": 3.86, "learning_rate": 3.409411101718794e-05, "loss": 104.8974, "step": 4570, "task_loss": 2.364339590072632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9454639202783137, "compression/movement_sparsity/importance_threshold": -0.00038195624047076757, "compression/movement_sparsity/linear_layer_sparsity": 0.8912603673483094, "compression/movement_sparsity/model_sparsity": 0.8606428318923026, "compression_loss": 100.53364562988281, "distillation_loss": 5.223026752471924, "epoch": 3.86, "learning_rate": 3.408941485864563e-05, "loss": 105.3514, "step": 4571, "task_loss": 3.9417033195495605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9455852913310384, "compression/movement_sparsity/importance_threshold": -0.0003811061898027091, "compression/movement_sparsity/linear_layer_sparsity": 0.8914089663253875, "compression/movement_sparsity/model_sparsity": 0.8607863260373741, "compression_loss": 100.54591369628906, "distillation_loss": 5.254037380218506, "epoch": 3.86, "learning_rate": 3.408471870010332e-05, "loss": 105.6839, "step": 4572, "task_loss": 2.153132677078247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9457064821743638, "compression/movement_sparsity/importance_threshold": -0.0003802574012734955, "compression/movement_sparsity/linear_layer_sparsity": 0.8913756740493481, "compression/movement_sparsity/model_sparsity": 0.8607541774534359, "compression_loss": 100.55821228027344, "distillation_loss": 4.223822593688965, "epoch": 3.87, "learning_rate": 3.408002254156101e-05, "loss": 105.297, "step": 4573, "task_loss": 2.9718286991119385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9458274929421752, "compression/movement_sparsity/importance_threshold": -0.00037940987394542974, "compression/movement_sparsity/linear_layer_sparsity": 0.8914539442857099, "compression/movement_sparsity/model_sparsity": 0.8608297588663912, "compression_loss": 100.5704574584961, "distillation_loss": 3.7230608463287354, "epoch": 3.87, "learning_rate": 3.407532638301869e-05, "loss": 105.0828, "step": 4574, "task_loss": 1.6224454641342163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9459483237683575, "compression/movement_sparsity/importance_threshold": -0.0003785636068808166, "compression/movement_sparsity/linear_layer_sparsity": 0.8914448938424743, "compression/movement_sparsity/model_sparsity": 0.8608210193337231, "compression_loss": 100.582763671875, "distillation_loss": 4.792668342590332, "epoch": 3.87, "learning_rate": 3.407063022447638e-05, "loss": 104.9859, "step": 4575, "task_loss": 2.8506128787994385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946068974786796, "compression/movement_sparsity/importance_threshold": -0.00037771859914196, "compression/movement_sparsity/linear_layer_sparsity": 0.8914831823447531, "compression/movement_sparsity/model_sparsity": 0.8608579925081594, "compression_loss": 100.59502410888672, "distillation_loss": 4.605414390563965, "epoch": 3.87, "learning_rate": 3.406593406593407e-05, "loss": 105.2027, "step": 4576, "task_loss": 2.140082836151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9461894461313758, "compression/movement_sparsity/importance_threshold": -0.0003768748497911629, "compression/movement_sparsity/linear_layer_sparsity": 0.8915669376982274, "compression/movement_sparsity/model_sparsity": 0.8609388706075802, "compression_loss": 100.60725402832031, "distillation_loss": 4.68811559677124, "epoch": 3.87, "learning_rate": 3.4061237907391756e-05, "loss": 104.7396, "step": 4577, "task_loss": 3.1756834983825684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946309737935982, "compression/movement_sparsity/importance_threshold": -0.00037603235789073104, "compression/movement_sparsity/linear_layer_sparsity": 0.891624483731238, "compression/movement_sparsity/model_sparsity": 0.8609944397573247, "compression_loss": 100.61952209472656, "distillation_loss": 4.796651840209961, "epoch": 3.87, "learning_rate": 3.405654174884944e-05, "loss": 105.1258, "step": 4578, "task_loss": 2.046924352645874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9464298503344998, "compression/movement_sparsity/importance_threshold": -0.0003751911225029674, "compression/movement_sparsity/linear_layer_sparsity": 0.8915847404805077, "compression/movement_sparsity/model_sparsity": 0.8609560618095216, "compression_loss": 100.63166046142578, "distillation_loss": 3.3481109142303467, "epoch": 3.87, "learning_rate": 3.405184559030713e-05, "loss": 105.0531, "step": 4579, "task_loss": 1.4707443714141846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9465497834608143, "compression/movement_sparsity/importance_threshold": -0.00037435114269017584, "compression/movement_sparsity/linear_layer_sparsity": 0.8916884330422692, "compression/movement_sparsity/model_sparsity": 0.8610561922127908, "compression_loss": 100.643798828125, "distillation_loss": 4.046797275543213, "epoch": 3.87, "learning_rate": 3.404714943176482e-05, "loss": 105.0061, "step": 4580, "task_loss": 1.945582389831543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9466695374488105, "compression/movement_sparsity/importance_threshold": -0.00037351241751466206, "compression/movement_sparsity/linear_layer_sparsity": 0.8917565438878052, "compression/movement_sparsity/model_sparsity": 0.8611219632412492, "compression_loss": 100.65595245361328, "distillation_loss": 6.010524272918701, "epoch": 3.87, "learning_rate": 3.404245327322251e-05, "loss": 105.0814, "step": 4581, "task_loss": 3.0034704208374023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.946789112432374, "compression/movement_sparsity/importance_threshold": -0.00037267494603872645, "compression/movement_sparsity/linear_layer_sparsity": 0.8919112361145453, "compression/movement_sparsity/model_sparsity": 0.8612713413141119, "compression_loss": 100.66813659667969, "distillation_loss": 4.315887451171875, "epoch": 3.87, "learning_rate": 3.4037757114680194e-05, "loss": 105.2493, "step": 4582, "task_loss": 2.8687756061553955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9469085085453894, "compression/movement_sparsity/importance_threshold": -0.0003718387273246764, "compression/movement_sparsity/linear_layer_sparsity": 0.891950836275264, "compression/movement_sparsity/model_sparsity": 0.8613095810874855, "compression_loss": 100.68025207519531, "distillation_loss": 5.229598522186279, "epoch": 3.87, "learning_rate": 3.403306095613788e-05, "loss": 105.2484, "step": 4583, "task_loss": 2.2565793991088867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9470277259217422, "compression/movement_sparsity/importance_threshold": -0.00037100376043481403, "compression/movement_sparsity/linear_layer_sparsity": 0.8920489125540689, "compression/movement_sparsity/model_sparsity": 0.8614042881443956, "compression_loss": 100.69231414794922, "distillation_loss": 3.4245283603668213, "epoch": 3.87, "learning_rate": 3.402836479759557e-05, "loss": 105.0676, "step": 4584, "task_loss": 2.0414838790893555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9471467646953173, "compression/movement_sparsity/importance_threshold": -0.00037017004443144503, "compression/movement_sparsity/linear_layer_sparsity": 0.8921379980104764, "compression/movement_sparsity/model_sparsity": 0.8614903132413166, "compression_loss": 100.70439147949219, "distillation_loss": 4.959332466125488, "epoch": 3.88, "learning_rate": 3.402366863905326e-05, "loss": 105.5242, "step": 4585, "task_loss": 3.6372737884521484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.947265625, "compression/movement_sparsity/importance_threshold": -0.00036933757837687153, "compression/movement_sparsity/linear_layer_sparsity": 0.8922087560212277, "compression/movement_sparsity/model_sparsity": 0.8615586404967214, "compression_loss": 100.71643829345703, "distillation_loss": 5.04964542388916, "epoch": 3.88, "learning_rate": 3.401897248051094e-05, "loss": 104.8315, "step": 4586, "task_loss": 2.7517597675323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9473843069696755, "compression/movement_sparsity/importance_threshold": -0.00036850636133339745, "compression/movement_sparsity/linear_layer_sparsity": 0.892278595871071, "compression/movement_sparsity/model_sparsity": 0.8616260811328701, "compression_loss": 100.72847747802734, "distillation_loss": 3.693368911743164, "epoch": 3.88, "learning_rate": 3.401427632196863e-05, "loss": 105.0103, "step": 4587, "task_loss": 1.712372899055481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9475028107382286, "compression/movement_sparsity/importance_threshold": -0.00036767639236332927, "compression/movement_sparsity/linear_layer_sparsity": 0.8922988908043872, "compression/movement_sparsity/model_sparsity": 0.8616456788727923, "compression_loss": 100.74050903320312, "distillation_loss": 5.214860916137695, "epoch": 3.88, "learning_rate": 3.400958016342632e-05, "loss": 105.4842, "step": 4588, "task_loss": 2.8640553951263428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9476211364395448, "compression/movement_sparsity/importance_threshold": -0.0003668476705289683, "compression/movement_sparsity/linear_layer_sparsity": 0.8923699588434969, "compression/movement_sparsity/model_sparsity": 0.8617143055061278, "compression_loss": 100.75250244140625, "distillation_loss": 5.294858932495117, "epoch": 3.88, "learning_rate": 3.4004884004884005e-05, "loss": 106.757, "step": 4589, "task_loss": 3.3166391849517822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9477392842075092, "compression/movement_sparsity/importance_threshold": -0.0003660201948926193, "compression/movement_sparsity/linear_layer_sparsity": 0.8924523190693578, "compression/movement_sparsity/model_sparsity": 0.8617938364048607, "compression_loss": 100.76454162597656, "distillation_loss": 4.4140119552612305, "epoch": 3.88, "learning_rate": 3.40001878463417e-05, "loss": 105.0569, "step": 4590, "task_loss": 2.84916615486145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9478572541760069, "compression/movement_sparsity/importance_threshold": -0.00036519396451658616, "compression/movement_sparsity/linear_layer_sparsity": 0.8924921457892616, "compression/movement_sparsity/model_sparsity": 0.8618322949544145, "compression_loss": 100.7764892578125, "distillation_loss": 4.313340187072754, "epoch": 3.88, "learning_rate": 3.399549168779938e-05, "loss": 105.0468, "step": 4591, "task_loss": 3.1176624298095703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9479750464789227, "compression/movement_sparsity/importance_threshold": -0.00036436897846317453, "compression/movement_sparsity/linear_layer_sparsity": 0.8925161968353832, "compression/movement_sparsity/model_sparsity": 0.861855519773112, "compression_loss": 100.78843688964844, "distillation_loss": 6.2348222732543945, "epoch": 3.88, "learning_rate": 3.399079552925707e-05, "loss": 105.7034, "step": 4592, "task_loss": 3.5231194496154785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9480926612501424, "compression/movement_sparsity/importance_threshold": -0.0003635452357946857, "compression/movement_sparsity/linear_layer_sparsity": 0.8926229181357241, "compression/movement_sparsity/model_sparsity": 0.8619585748684732, "compression_loss": 100.8003921508789, "distillation_loss": 4.526049613952637, "epoch": 3.88, "learning_rate": 3.398609937071476e-05, "loss": 104.7618, "step": 4593, "task_loss": 2.387983560562134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9482100986235505, "compression/movement_sparsity/importance_threshold": -0.0003627227355734253, "compression/movement_sparsity/linear_layer_sparsity": 0.8926213322214286, "compression/movement_sparsity/model_sparsity": 0.8619570434352125, "compression_loss": 100.81232452392578, "distillation_loss": 5.399662017822266, "epoch": 3.88, "learning_rate": 3.3981403212172444e-05, "loss": 104.5675, "step": 4594, "task_loss": 3.5217273235321045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9483273587330325, "compression/movement_sparsity/importance_threshold": -0.00036190147686169724, "compression/movement_sparsity/linear_layer_sparsity": 0.8926457052200762, "compression/movement_sparsity/model_sparsity": 0.8619805791463766, "compression_loss": 100.82414245605469, "distillation_loss": 3.3795714378356934, "epoch": 3.88, "learning_rate": 3.397670705363013e-05, "loss": 104.9265, "step": 4595, "task_loss": 1.6177798509597778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9484444417124736, "compression/movement_sparsity/importance_threshold": -0.00036108145872180454, "compression/movement_sparsity/linear_layer_sparsity": 0.8927784092816958, "compression/movement_sparsity/model_sparsity": 0.862108724415234, "compression_loss": 100.8360366821289, "distillation_loss": 4.153824329376221, "epoch": 3.88, "learning_rate": 3.3972010895087817e-05, "loss": 104.7519, "step": 4596, "task_loss": 3.192978858947754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9485613476957586, "compression/movement_sparsity/importance_threshold": -0.00036026268021605284, "compression/movement_sparsity/linear_layer_sparsity": 0.8929154179678318, "compression/movement_sparsity/model_sparsity": 0.8622410264315132, "compression_loss": 100.84784698486328, "distillation_loss": 4.537819862365723, "epoch": 3.89, "learning_rate": 3.396731473654551e-05, "loss": 105.7919, "step": 4597, "task_loss": 2.135457992553711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.948678076816773, "compression/movement_sparsity/importance_threshold": -0.0003594451404067443, "compression/movement_sparsity/linear_layer_sparsity": 0.8929555308677589, "compression/movement_sparsity/model_sparsity": 0.8622797613299261, "compression_loss": 100.85968017578125, "distillation_loss": 4.291610240936279, "epoch": 3.89, "learning_rate": 3.3962618578003196e-05, "loss": 105.1975, "step": 4598, "task_loss": 1.4691524505615234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9487946292094017, "compression/movement_sparsity/importance_threshold": -0.0003586288383561837, "compression/movement_sparsity/linear_layer_sparsity": 0.8930258119118046, "compression/movement_sparsity/model_sparsity": 0.8623476280038992, "compression_loss": 100.87142944335938, "distillation_loss": 4.002674579620361, "epoch": 3.89, "learning_rate": 3.395792241946088e-05, "loss": 104.5335, "step": 4599, "task_loss": 1.9293248653411865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9489110050075299, "compression/movement_sparsity/importance_threshold": -0.00035781377312667575, "compression/movement_sparsity/linear_layer_sparsity": 0.8931613420011538, "compression/movement_sparsity/model_sparsity": 0.8624785022177399, "compression_loss": 100.88325500488281, "distillation_loss": 5.997797966003418, "epoch": 3.89, "learning_rate": 3.395322626091857e-05, "loss": 106.0315, "step": 4600, "task_loss": 2.6881532669067383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9490272043450427, "compression/movement_sparsity/importance_threshold": -0.00035699994378052356, "compression/movement_sparsity/linear_layer_sparsity": 0.8932217856069, "compression/movement_sparsity/model_sparsity": 0.8625368693996824, "compression_loss": 100.89498901367188, "distillation_loss": 4.720440864562988, "epoch": 3.89, "learning_rate": 3.3948530102376255e-05, "loss": 105.6037, "step": 4601, "task_loss": 3.422818422317505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9491432273558256, "compression/movement_sparsity/importance_threshold": -0.0003561873493800301, "compression/movement_sparsity/linear_layer_sparsity": 0.8932982433697813, "compression/movement_sparsity/model_sparsity": 0.862610700603197, "compression_loss": 100.90672302246094, "distillation_loss": 3.991898536682129, "epoch": 3.89, "learning_rate": 3.394383394383395e-05, "loss": 105.1803, "step": 4602, "task_loss": 2.160602569580078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9492590741737632, "compression/movement_sparsity/importance_threshold": -0.00035537598898750106, "compression/movement_sparsity/linear_layer_sparsity": 0.8933128981718057, "compression/movement_sparsity/model_sparsity": 0.8626248519676885, "compression_loss": 100.91842651367188, "distillation_loss": 3.972752332687378, "epoch": 3.89, "learning_rate": 3.393913778529163e-05, "loss": 104.5996, "step": 4603, "task_loss": 1.4807106256484985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9493747449327409, "compression/movement_sparsity/importance_threshold": -0.00035456586166524116, "compression/movement_sparsity/linear_layer_sparsity": 0.8934116302798306, "compression/movement_sparsity/model_sparsity": 0.8627201923240673, "compression_loss": 100.93008422851562, "distillation_loss": 3.737037420272827, "epoch": 3.89, "learning_rate": 3.393444162674932e-05, "loss": 105.2679, "step": 4604, "task_loss": 3.6903860569000244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9494902397666438, "compression/movement_sparsity/importance_threshold": -0.0003537569664755517, "compression/movement_sparsity/linear_layer_sparsity": 0.8934162330085381, "compression/movement_sparsity/model_sparsity": 0.862724636934884, "compression_loss": 100.9417953491211, "distillation_loss": 4.999066352844238, "epoch": 3.89, "learning_rate": 3.392974546820701e-05, "loss": 105.469, "step": 4605, "task_loss": 2.1755032539367676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9496055588093572, "compression/movement_sparsity/importance_threshold": -0.00035294930248073834, "compression/movement_sparsity/linear_layer_sparsity": 0.8934778571068802, "compression/movement_sparsity/model_sparsity": 0.8627841440558702, "compression_loss": 100.9533920288086, "distillation_loss": 3.834989547729492, "epoch": 3.89, "learning_rate": 3.39250493096647e-05, "loss": 105.0337, "step": 4606, "task_loss": 3.3596999645233154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.949720702194766, "compression/movement_sparsity/importance_threshold": -0.00035214286874310496, "compression/movement_sparsity/linear_layer_sparsity": 0.8935577371058728, "compression/movement_sparsity/model_sparsity": 0.8628612799311578, "compression_loss": 100.96499633789062, "distillation_loss": 6.507474899291992, "epoch": 3.89, "learning_rate": 3.392035315112239e-05, "loss": 106.0033, "step": 4607, "task_loss": 4.008217811584473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9498356700567555, "compression/movement_sparsity/importance_threshold": -0.00035133766432495547, "compression/movement_sparsity/linear_layer_sparsity": 0.8936075562782555, "compression/movement_sparsity/model_sparsity": 0.8629093876617074, "compression_loss": 100.9765853881836, "distillation_loss": 4.785038948059082, "epoch": 3.89, "learning_rate": 3.3915656992580066e-05, "loss": 104.9709, "step": 4608, "task_loss": 2.398371934890747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9499504625292108, "compression/movement_sparsity/importance_threshold": -0.0003505336882885929, "compression/movement_sparsity/linear_layer_sparsity": 0.8936079497757875, "compression/movement_sparsity/model_sparsity": 0.8629097676413886, "compression_loss": 100.98819732666016, "distillation_loss": 5.563446998596191, "epoch": 3.9, "learning_rate": 3.391096083403776e-05, "loss": 105.7525, "step": 4609, "task_loss": 2.4296858310699463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950065079746017, "compression/movement_sparsity/importance_threshold": -0.00034973093969632284, "compression/movement_sparsity/linear_layer_sparsity": 0.8936861365429758, "compression/movement_sparsity/model_sparsity": 0.8629852684525934, "compression_loss": 100.99979400634766, "distillation_loss": 3.281094551086426, "epoch": 3.9, "learning_rate": 3.3906264675495446e-05, "loss": 105.1892, "step": 4610, "task_loss": 1.359919548034668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9501795218410594, "compression/movement_sparsity/importance_threshold": -0.0003489294176104475, "compression/movement_sparsity/linear_layer_sparsity": 0.8937487264988965, "compression/movement_sparsity/model_sparsity": 0.8630457082509789, "compression_loss": 101.01136779785156, "distillation_loss": 4.986927509307861, "epoch": 3.9, "learning_rate": 3.390156851695314e-05, "loss": 105.0252, "step": 4611, "task_loss": 3.737544536590576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9502937889482228, "compression/movement_sparsity/importance_threshold": -0.0003481291210932734, "compression/movement_sparsity/linear_layer_sparsity": 0.8938543626999826, "compression/movement_sparsity/model_sparsity": 0.8631477155235828, "compression_loss": 101.02296447753906, "distillation_loss": 5.24118709564209, "epoch": 3.9, "learning_rate": 3.389687235841082e-05, "loss": 105.4196, "step": 4612, "task_loss": 3.7000200748443604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9504078812013926, "compression/movement_sparsity/importance_threshold": -0.0003473300492071018, "compression/movement_sparsity/linear_layer_sparsity": 0.8938983986510618, "compression/movement_sparsity/model_sparsity": 0.863190238704272, "compression_loss": 101.03450775146484, "distillation_loss": 5.289562225341797, "epoch": 3.9, "learning_rate": 3.389217619986851e-05, "loss": 106.3182, "step": 4613, "task_loss": 3.251208782196045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950521798734454, "compression/movement_sparsity/importance_threshold": -0.00034653220101423833, "compression/movement_sparsity/linear_layer_sparsity": 0.8939816293411602, "compression/movement_sparsity/model_sparsity": 0.863270610164118, "compression_loss": 101.04598236083984, "distillation_loss": 5.181283473968506, "epoch": 3.9, "learning_rate": 3.38874800413262e-05, "loss": 105.5977, "step": 4614, "task_loss": 3.2465696334838867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9506355416812919, "compression/movement_sparsity/importance_threshold": -0.0003457355755769869, "compression/movement_sparsity/linear_layer_sparsity": 0.8939361386416294, "compression/movement_sparsity/model_sparsity": 0.8632266822100618, "compression_loss": 101.05746459960938, "distillation_loss": 3.699388027191162, "epoch": 3.9, "learning_rate": 3.3882783882783884e-05, "loss": 104.9557, "step": 4615, "task_loss": 2.6295366287231445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9507491101757917, "compression/movement_sparsity/importance_threshold": -0.00034494017195765055, "compression/movement_sparsity/linear_layer_sparsity": 0.8938660483842658, "compression/movement_sparsity/model_sparsity": 0.8631589997686615, "compression_loss": 101.0689697265625, "distillation_loss": 4.068443298339844, "epoch": 3.9, "learning_rate": 3.387808772424157e-05, "loss": 105.0924, "step": 4616, "task_loss": 1.592682957649231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9508625043518383, "compression/movement_sparsity/importance_threshold": -0.00034414598921853404, "compression/movement_sparsity/linear_layer_sparsity": 0.8939132680881038, "compression/movement_sparsity/model_sparsity": 0.8632045973304079, "compression_loss": 101.08039855957031, "distillation_loss": 4.861985683441162, "epoch": 3.9, "learning_rate": 3.387339156569926e-05, "loss": 106.1402, "step": 4617, "task_loss": 2.5748844146728516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.950975724343317, "compression/movement_sparsity/importance_threshold": -0.0003433530264219404, "compression/movement_sparsity/linear_layer_sparsity": 0.8939078068193266, "compression/movement_sparsity/model_sparsity": 0.863199323673014, "compression_loss": 101.09178161621094, "distillation_loss": 5.338858604431152, "epoch": 3.9, "learning_rate": 3.386869540715695e-05, "loss": 105.9183, "step": 4618, "task_loss": 3.4013051986694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9510887702841129, "compression/movement_sparsity/importance_threshold": -0.00034256128263017525, "compression/movement_sparsity/linear_layer_sparsity": 0.8939497083443989, "compression/movement_sparsity/model_sparsity": 0.8632397857517959, "compression_loss": 101.10321044921875, "distillation_loss": 5.493746757507324, "epoch": 3.9, "learning_rate": 3.3863999248614636e-05, "loss": 105.695, "step": 4619, "task_loss": 3.2099924087524414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9512016423081112, "compression/movement_sparsity/importance_threshold": -0.0003417707569055399, "compression/movement_sparsity/linear_layer_sparsity": 0.894004917240553, "compression/movement_sparsity/model_sparsity": 0.8632930980525246, "compression_loss": 101.11459350585938, "distillation_loss": 5.106605052947998, "epoch": 3.9, "learning_rate": 3.385930309007232e-05, "loss": 105.6518, "step": 4620, "task_loss": 2.951779842376709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.951314340549197, "compression/movement_sparsity/importance_threshold": -0.00034098144831034086, "compression/movement_sparsity/linear_layer_sparsity": 0.8940775711939584, "compression/movement_sparsity/model_sparsity": 0.8633632561191208, "compression_loss": 101.1259765625, "distillation_loss": 4.7546234130859375, "epoch": 3.91, "learning_rate": 3.385460693153001e-05, "loss": 105.4049, "step": 4621, "task_loss": 2.6324379444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9514268651412552, "compression/movement_sparsity/importance_threshold": -0.0003401933559068829, "compression/movement_sparsity/linear_layer_sparsity": 0.8941145003411266, "compression/movement_sparsity/model_sparsity": 0.8633989166364765, "compression_loss": 101.13729095458984, "distillation_loss": 4.950292587280273, "epoch": 3.91, "learning_rate": 3.3849910772987695e-05, "loss": 105.8242, "step": 4622, "task_loss": 2.4742188453674316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9515392162181713, "compression/movement_sparsity/importance_threshold": -0.00033940647875746726, "compression/movement_sparsity/linear_layer_sparsity": 0.8942456781092887, "compression/movement_sparsity/model_sparsity": 0.8635255880447522, "compression_loss": 101.14863586425781, "distillation_loss": 4.755215644836426, "epoch": 3.91, "learning_rate": 3.384521461444539e-05, "loss": 105.9388, "step": 4623, "task_loss": 2.502890110015869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9516513939138304, "compression/movement_sparsity/importance_threshold": -0.0003386208159243979, "compression/movement_sparsity/linear_layer_sparsity": 0.8942765259309627, "compression/movement_sparsity/model_sparsity": 0.8635553761488527, "compression_loss": 101.15995788574219, "distillation_loss": 5.041110992431641, "epoch": 3.91, "learning_rate": 3.3840518455903075e-05, "loss": 105.6302, "step": 4624, "task_loss": 2.496145009994507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9517633983621173, "compression/movement_sparsity/importance_threshold": -0.0003378363664699813, "compression/movement_sparsity/linear_layer_sparsity": 0.8944004895777051, "compression/movement_sparsity/model_sparsity": 0.8636750812629728, "compression_loss": 101.1712646484375, "distillation_loss": 4.949995040893555, "epoch": 3.91, "learning_rate": 3.383582229736076e-05, "loss": 105.3401, "step": 4625, "task_loss": 2.899430990219116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9518752296969175, "compression/movement_sparsity/importance_threshold": -0.00033705312945651964, "compression/movement_sparsity/linear_layer_sparsity": 0.8945225811301287, "compression/movement_sparsity/model_sparsity": 0.8637929785949731, "compression_loss": 101.18251037597656, "distillation_loss": 4.48978328704834, "epoch": 3.91, "learning_rate": 3.383112613881845e-05, "loss": 105.5277, "step": 4626, "task_loss": 1.7026641368865967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9519868880521161, "compression/movement_sparsity/importance_threshold": -0.00033627110394631766, "compression/movement_sparsity/linear_layer_sparsity": 0.8946228037591082, "compression/movement_sparsity/model_sparsity": 0.8638897582683261, "compression_loss": 101.19380187988281, "distillation_loss": 5.110532760620117, "epoch": 3.91, "learning_rate": 3.3826429980276134e-05, "loss": 106.3328, "step": 4627, "task_loss": 2.773049831390381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.952098373561598, "compression/movement_sparsity/importance_threshold": -0.0003354902890016793, "compression/movement_sparsity/linear_layer_sparsity": 0.894765059079004, "compression/movement_sparsity/model_sparsity": 0.8640271266803551, "compression_loss": 101.205078125, "distillation_loss": 4.636569976806641, "epoch": 3.91, "learning_rate": 3.382173382173383e-05, "loss": 104.9572, "step": 4628, "task_loss": 2.68298077583313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9522096863592486, "compression/movement_sparsity/importance_threshold": -0.0003347106836849075, "compression/movement_sparsity/linear_layer_sparsity": 0.8948279590632832, "compression/movement_sparsity/model_sparsity": 0.8640878658566713, "compression_loss": 101.2163314819336, "distillation_loss": 6.372221946716309, "epoch": 3.91, "learning_rate": 3.3817037663191506e-05, "loss": 106.0487, "step": 4629, "task_loss": 3.299360513687134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.952320826578953, "compression/movement_sparsity/importance_threshold": -0.00033393228705830714, "compression/movement_sparsity/linear_layer_sparsity": 0.8948857435796466, "compression/movement_sparsity/model_sparsity": 0.8641436652971316, "compression_loss": 101.22756958007812, "distillation_loss": 4.611349582672119, "epoch": 3.91, "learning_rate": 3.38123415046492e-05, "loss": 106.4568, "step": 4630, "task_loss": 2.840118646621704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9524317943545961, "compression/movement_sparsity/importance_threshold": -0.00033315509818418205, "compression/movement_sparsity/linear_layer_sparsity": 0.8949338814443926, "compression/movement_sparsity/model_sparsity": 0.8641901494781342, "compression_loss": 101.23886108398438, "distillation_loss": 4.797815322875977, "epoch": 3.91, "learning_rate": 3.3807645346106886e-05, "loss": 105.6075, "step": 4631, "task_loss": 2.321150302886963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9525425898200633, "compression/movement_sparsity/importance_threshold": -0.000332379116124837, "compression/movement_sparsity/linear_layer_sparsity": 0.8949515530608289, "compression/movement_sparsity/model_sparsity": 0.8642072140201816, "compression_loss": 101.25005340576172, "distillation_loss": 5.359033584594727, "epoch": 3.91, "learning_rate": 3.380294918756457e-05, "loss": 105.8669, "step": 4632, "task_loss": 2.8965680599212646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9526532131092398, "compression/movement_sparsity/importance_threshold": -0.0003316043399425742, "compression/movement_sparsity/linear_layer_sparsity": 0.8950072389236884, "compression/movement_sparsity/model_sparsity": 0.8642609869023422, "compression_loss": 101.26126861572266, "distillation_loss": 6.089877128601074, "epoch": 3.92, "learning_rate": 3.379825302902226e-05, "loss": 105.6845, "step": 4633, "task_loss": 2.9919357299804688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9527636643560105, "compression/movement_sparsity/importance_threshold": -0.0003308307686996992, "compression/movement_sparsity/linear_layer_sparsity": 0.8950930690823313, "compression/movement_sparsity/model_sparsity": 0.8643438685309913, "compression_loss": 101.27244567871094, "distillation_loss": 5.567140579223633, "epoch": 3.92, "learning_rate": 3.3793556870479945e-05, "loss": 105.5333, "step": 4634, "task_loss": 2.6638662815093994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9528739436942606, "compression/movement_sparsity/importance_threshold": -0.00033005840145851513, "compression/movement_sparsity/linear_layer_sparsity": 0.8951400741511519, "compression/movement_sparsity/model_sparsity": 0.8643892588310934, "compression_loss": 101.28358459472656, "distillation_loss": 3.47590970993042, "epoch": 3.92, "learning_rate": 3.378886071193764e-05, "loss": 106.1557, "step": 4635, "task_loss": 2.424560308456421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9529840512578753, "compression/movement_sparsity/importance_threshold": -0.0003292872372813267, "compression/movement_sparsity/linear_layer_sparsity": 0.8952808747225962, "compression/movement_sparsity/model_sparsity": 0.8645252224697553, "compression_loss": 101.29469299316406, "distillation_loss": 3.531162738800049, "epoch": 3.92, "learning_rate": 3.3784164553395324e-05, "loss": 105.9478, "step": 4636, "task_loss": 1.9088877439498901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9530939871807399, "compression/movement_sparsity/importance_threshold": -0.00032851727523043606, "compression/movement_sparsity/linear_layer_sparsity": 0.8953318743875748, "compression/movement_sparsity/model_sparsity": 0.8645744701393485, "compression_loss": 101.30580139160156, "distillation_loss": 3.3463287353515625, "epoch": 3.92, "learning_rate": 3.377946839485302e-05, "loss": 105.5874, "step": 4637, "task_loss": 2.1914350986480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9532037515967392, "compression/movement_sparsity/importance_threshold": -0.00032774851436814974, "compression/movement_sparsity/linear_layer_sparsity": 0.8953269974030117, "compression/movement_sparsity/model_sparsity": 0.8645697606942085, "compression_loss": 101.31686401367188, "distillation_loss": 4.6450605392456055, "epoch": 3.92, "learning_rate": 3.37747722363107e-05, "loss": 106.0068, "step": 4638, "task_loss": 3.1419427394866943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9533133446397585, "compression/movement_sparsity/importance_threshold": -0.00032698095375677076, "compression/movement_sparsity/linear_layer_sparsity": 0.8953634257351393, "compression/movement_sparsity/model_sparsity": 0.8646049376010609, "compression_loss": 101.32795715332031, "distillation_loss": 6.041463851928711, "epoch": 3.92, "learning_rate": 3.377007607776838e-05, "loss": 106.3565, "step": 4639, "task_loss": 2.350584030151367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.953422766443683, "compression/movement_sparsity/importance_threshold": -0.00032621459245860216, "compression/movement_sparsity/linear_layer_sparsity": 0.8954240601275676, "compression/movement_sparsity/model_sparsity": 0.8646634890155762, "compression_loss": 101.33897399902344, "distillation_loss": 5.515351295471191, "epoch": 3.92, "learning_rate": 3.3765379919226076e-05, "loss": 106.0872, "step": 4640, "task_loss": 2.9922573566436768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9535320171423978, "compression/movement_sparsity/importance_threshold": -0.0003254494295359487, "compression/movement_sparsity/linear_layer_sparsity": 0.8954567919677281, "compression/movement_sparsity/model_sparsity": 0.8646950964163321, "compression_loss": 101.34993743896484, "distillation_loss": 4.323127269744873, "epoch": 3.92, "learning_rate": 3.376068376068376e-05, "loss": 105.6536, "step": 4641, "task_loss": 1.8231791257858276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.953641096869788, "compression/movement_sparsity/importance_threshold": -0.0003246854640511134, "compression/movement_sparsity/linear_layer_sparsity": 0.8955853941156808, "compression/movement_sparsity/model_sparsity": 0.8648192806848763, "compression_loss": 101.36093139648438, "distillation_loss": 5.5626044273376465, "epoch": 3.92, "learning_rate": 3.375598760214145e-05, "loss": 105.8588, "step": 4642, "task_loss": 1.7434442043304443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9537500057597388, "compression/movement_sparsity/importance_threshold": -0.0003239226950664028, "compression/movement_sparsity/linear_layer_sparsity": 0.89563008589598, "compression/movement_sparsity/model_sparsity": 0.8648624371650342, "compression_loss": 101.37188720703125, "distillation_loss": 3.5114352703094482, "epoch": 3.92, "learning_rate": 3.3751291443599136e-05, "loss": 105.8041, "step": 4643, "task_loss": 1.7396771907806396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9538587439461352, "compression/movement_sparsity/importance_threshold": -0.000323161121644119, "compression/movement_sparsity/linear_layer_sparsity": 0.8955506947878604, "compression/movement_sparsity/model_sparsity": 0.8647857733857142, "compression_loss": 101.38284301757812, "distillation_loss": 4.426527976989746, "epoch": 3.93, "learning_rate": 3.374659528505683e-05, "loss": 105.6584, "step": 4644, "task_loss": 2.5360474586486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9539673115628625, "compression/movement_sparsity/importance_threshold": -0.000322400742846566, "compression/movement_sparsity/linear_layer_sparsity": 0.8954914793713807, "compression/movement_sparsity/model_sparsity": 0.8647285922009585, "compression_loss": 101.393798828125, "distillation_loss": 5.229212760925293, "epoch": 3.93, "learning_rate": 3.3741899126514515e-05, "loss": 106.0822, "step": 4645, "task_loss": 3.2477760314941406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9540757087438059, "compression/movement_sparsity/importance_threshold": -0.00032164155773604756, "compression/movement_sparsity/linear_layer_sparsity": 0.89551049841876, "compression/movement_sparsity/model_sparsity": 0.8647469578855508, "compression_loss": 101.40473175048828, "distillation_loss": 3.6485395431518555, "epoch": 3.93, "learning_rate": 3.37372029679722e-05, "loss": 105.7291, "step": 4646, "task_loss": 2.2173216342926025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9541839356228503, "compression/movement_sparsity/importance_threshold": -0.00032088356537486856, "compression/movement_sparsity/linear_layer_sparsity": 0.8956782953057318, "compression/movement_sparsity/model_sparsity": 0.8649089904332515, "compression_loss": 101.41555786132812, "distillation_loss": 5.659440040588379, "epoch": 3.93, "learning_rate": 3.373250680942989e-05, "loss": 105.5045, "step": 4647, "task_loss": 2.8491249084472656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9542919923338811, "compression/movement_sparsity/importance_threshold": -0.000320126764825332, "compression/movement_sparsity/linear_layer_sparsity": 0.8957486001981129, "compression/movement_sparsity/model_sparsity": 0.8649768801362961, "compression_loss": 101.42647552490234, "distillation_loss": 5.4664483070373535, "epoch": 3.93, "learning_rate": 3.3727810650887574e-05, "loss": 106.3987, "step": 4648, "task_loss": 3.7156589031219482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9543998790107833, "compression/movement_sparsity/importance_threshold": -0.00031937115514974173, "compression/movement_sparsity/linear_layer_sparsity": 0.8958402374263945, "compression/movement_sparsity/model_sparsity": 0.8650653693438772, "compression_loss": 101.43732452392578, "distillation_loss": 4.4118332862854, "epoch": 3.93, "learning_rate": 3.372311449234527e-05, "loss": 105.9383, "step": 4649, "task_loss": 1.658392310142517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9545075957874419, "compression/movement_sparsity/importance_threshold": -0.00031861673541040345, "compression/movement_sparsity/linear_layer_sparsity": 0.8958693443195936, "compression/movement_sparsity/model_sparsity": 0.8650934763257517, "compression_loss": 101.4481201171875, "distillation_loss": 6.654323577880859, "epoch": 3.93, "learning_rate": 3.3718418333802953e-05, "loss": 106.2466, "step": 4650, "task_loss": 3.152669668197632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9546151427977424, "compression/movement_sparsity/importance_threshold": -0.0003178635046696193, "compression/movement_sparsity/linear_layer_sparsity": 0.8959442400165145, "compression/movement_sparsity/model_sparsity": 0.8651657991250772, "compression_loss": 101.45892333984375, "distillation_loss": 3.328441619873047, "epoch": 3.93, "learning_rate": 3.371372217526064e-05, "loss": 106.0105, "step": 4651, "task_loss": 1.8803482055664062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9547225201755696, "compression/movement_sparsity/importance_threshold": -0.000317111461989694, "compression/movement_sparsity/linear_layer_sparsity": 0.8959275700301595, "compression/movement_sparsity/model_sparsity": 0.8651497018040364, "compression_loss": 101.46969604492188, "distillation_loss": 5.105778217315674, "epoch": 3.93, "learning_rate": 3.3709026016718326e-05, "loss": 106.0904, "step": 4652, "task_loss": 2.0406389236450195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9548297280548088, "compression/movement_sparsity/importance_threshold": -0.0003163606064329324, "compression/movement_sparsity/linear_layer_sparsity": 0.896016166595694, "compression/movement_sparsity/model_sparsity": 0.8652352548049899, "compression_loss": 101.48047637939453, "distillation_loss": 5.9561262130737305, "epoch": 3.93, "learning_rate": 3.370432985817601e-05, "loss": 106.2431, "step": 4653, "task_loss": 3.019451379776001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9549367665693451, "compression/movement_sparsity/importance_threshold": -0.00031561093706163664, "compression/movement_sparsity/linear_layer_sparsity": 0.8960252289630973, "compression/movement_sparsity/model_sparsity": 0.8652440058521937, "compression_loss": 101.49125671386719, "distillation_loss": 5.095379829406738, "epoch": 3.93, "learning_rate": 3.3699633699633706e-05, "loss": 105.8268, "step": 4654, "task_loss": 3.170586585998535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9550436358530636, "compression/movement_sparsity/importance_threshold": -0.0003148624529381132, "compression/movement_sparsity/linear_layer_sparsity": 0.896153962276894, "compression/movement_sparsity/model_sparsity": 0.8653683167806316, "compression_loss": 101.5019302368164, "distillation_loss": 5.1330485343933105, "epoch": 3.93, "learning_rate": 3.3694937541091385e-05, "loss": 106.0014, "step": 4655, "task_loss": 2.6393909454345703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9551503360398497, "compression/movement_sparsity/importance_threshold": -0.0003141151531246625, "compression/movement_sparsity/linear_layer_sparsity": 0.896142503151796, "compression/movement_sparsity/model_sparsity": 0.865357251311733, "compression_loss": 101.51266479492188, "distillation_loss": 5.0952959060668945, "epoch": 3.94, "learning_rate": 3.369024138254908e-05, "loss": 106.3751, "step": 4656, "task_loss": 3.0116846561431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9552568672635882, "compression/movement_sparsity/importance_threshold": -0.0003133690366835919, "compression/movement_sparsity/linear_layer_sparsity": 0.8961847743260651, "compression/movement_sparsity/model_sparsity": 0.8653980703411247, "compression_loss": 101.52335357666016, "distillation_loss": 3.407650947570801, "epoch": 3.94, "learning_rate": 3.3685545224006765e-05, "loss": 105.9285, "step": 4657, "task_loss": 2.41762375831604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9553632296581643, "compression/movement_sparsity/importance_threshold": -0.0003126241026772036, "compression/movement_sparsity/linear_layer_sparsity": 0.8962495344804955, "compression/movement_sparsity/model_sparsity": 0.8654606057850248, "compression_loss": 101.53398132324219, "distillation_loss": 5.331636428833008, "epoch": 3.94, "learning_rate": 3.368084906546445e-05, "loss": 105.6985, "step": 4658, "task_loss": 3.657886505126953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9554694233574633, "compression/movement_sparsity/importance_threshold": -0.0003118803501678015, "compression/movement_sparsity/linear_layer_sparsity": 0.8962886934470117, "compression/movement_sparsity/model_sparsity": 0.8654984195205742, "compression_loss": 101.54467010498047, "distillation_loss": 5.184813499450684, "epoch": 3.94, "learning_rate": 3.367615290692214e-05, "loss": 106.2971, "step": 4659, "task_loss": 3.0214946269989014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9555754484953703, "compression/movement_sparsity/importance_threshold": -0.00031113777821769033, "compression/movement_sparsity/linear_layer_sparsity": 0.8963683707351545, "compression/movement_sparsity/model_sparsity": 0.8655753596487532, "compression_loss": 101.5553207397461, "distillation_loss": 3.6411643028259277, "epoch": 3.94, "learning_rate": 3.3671456748379824e-05, "loss": 105.1734, "step": 4660, "task_loss": 1.627065896987915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9556813052057704, "compression/movement_sparsity/importance_threshold": -0.000310396385889174, "compression/movement_sparsity/linear_layer_sparsity": 0.8964309010702369, "compression/movement_sparsity/model_sparsity": 0.8656357418744598, "compression_loss": 101.56591033935547, "distillation_loss": 6.332613468170166, "epoch": 3.94, "learning_rate": 3.366676058983752e-05, "loss": 106.0168, "step": 4661, "task_loss": 3.449110269546509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9557869936225487, "compression/movement_sparsity/importance_threshold": -0.00030965617224455646, "compression/movement_sparsity/linear_layer_sparsity": 0.8964505878710037, "compression/movement_sparsity/model_sparsity": 0.8656547523730566, "compression_loss": 101.57649993896484, "distillation_loss": 2.3319077491760254, "epoch": 3.94, "learning_rate": 3.36620644312952e-05, "loss": 105.6631, "step": 4662, "task_loss": 1.0662107467651367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9558925138795905, "compression/movement_sparsity/importance_threshold": -0.0003089171363461407, "compression/movement_sparsity/linear_layer_sparsity": 0.896442491361179, "compression/movement_sparsity/model_sparsity": 0.8656469340032521, "compression_loss": 101.58708190917969, "distillation_loss": 2.981451988220215, "epoch": 3.94, "learning_rate": 3.365736827275289e-05, "loss": 105.858, "step": 4663, "task_loss": 2.2606122493743896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9559978661107807, "compression/movement_sparsity/importance_threshold": -0.0003081792772562323, "compression/movement_sparsity/linear_layer_sparsity": 0.8964808275601284, "compression/movement_sparsity/model_sparsity": 0.8656839532358316, "compression_loss": 101.59762573242188, "distillation_loss": 4.395840167999268, "epoch": 3.94, "learning_rate": 3.3652672114210576e-05, "loss": 105.6962, "step": 4664, "task_loss": 1.8972145318984985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9561030504500045, "compression/movement_sparsity/importance_threshold": -0.00030744259403713437, "compression/movement_sparsity/linear_layer_sparsity": 0.8965512278458505, "compression/movement_sparsity/model_sparsity": 0.8657519350551626, "compression_loss": 101.60818481445312, "distillation_loss": 5.024073600769043, "epoch": 3.94, "learning_rate": 3.364797595566826e-05, "loss": 105.8219, "step": 4665, "task_loss": 3.489093542098999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9562080670311472, "compression/movement_sparsity/importance_threshold": -0.0003067070857511508, "compression/movement_sparsity/linear_layer_sparsity": 0.8966131142758806, "compression/movement_sparsity/model_sparsity": 0.8658116954959362, "compression_loss": 101.61870574951172, "distillation_loss": 4.183615207672119, "epoch": 3.94, "learning_rate": 3.3643279797125955e-05, "loss": 105.8607, "step": 4666, "task_loss": 1.938394546508789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956312915988094, "compression/movement_sparsity/importance_threshold": -0.00030597275146058544, "compression/movement_sparsity/linear_layer_sparsity": 0.8966769920419059, "compression/movement_sparsity/model_sparsity": 0.8658733788641876, "compression_loss": 101.62920379638672, "distillation_loss": 3.818817615509033, "epoch": 3.94, "learning_rate": 3.363858363858364e-05, "loss": 105.7471, "step": 4667, "task_loss": 2.4525699615478516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9564175974547297, "compression/movement_sparsity/importance_threshold": -0.0003052395902277431, "compression/movement_sparsity/linear_layer_sparsity": 0.8966454168460062, "compression/movement_sparsity/model_sparsity": 0.8658428883734036, "compression_loss": 101.63967895507812, "distillation_loss": 3.6939475536346436, "epoch": 3.95, "learning_rate": 3.363388748004133e-05, "loss": 105.8869, "step": 4668, "task_loss": 2.4718070030212402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9565221115649396, "compression/movement_sparsity/importance_threshold": -0.00030450760111492767, "compression/movement_sparsity/linear_layer_sparsity": 0.8966742494833496, "compression/movement_sparsity/model_sparsity": 0.8658707305209548, "compression_loss": 101.65015411376953, "distillation_loss": 3.8367724418640137, "epoch": 3.95, "learning_rate": 3.3629191321499014e-05, "loss": 106.0676, "step": 4669, "task_loss": 2.6185972690582275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.956626458452609, "compression/movement_sparsity/importance_threshold": -0.0003037767831844413, "compression/movement_sparsity/linear_layer_sparsity": 0.8967708948620383, "compression/movement_sparsity/model_sparsity": 0.8659640558335695, "compression_loss": 101.66059112548828, "distillation_loss": 5.730309963226318, "epoch": 3.95, "learning_rate": 3.362449516295671e-05, "loss": 105.5365, "step": 4670, "task_loss": 2.2513010501861572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9567306382516227, "compression/movement_sparsity/importance_threshold": -0.0003030471354985905, "compression/movement_sparsity/linear_layer_sparsity": 0.8968961343947178, "compression/movement_sparsity/model_sparsity": 0.8660849930030196, "compression_loss": 101.67102813720703, "distillation_loss": 4.1641693115234375, "epoch": 3.95, "learning_rate": 3.3619799004414394e-05, "loss": 106.0015, "step": 4671, "task_loss": 2.709794044494629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9568346510958662, "compression/movement_sparsity/importance_threshold": -0.0003023186571196783, "compression/movement_sparsity/linear_layer_sparsity": 0.8970167115778517, "compression/movement_sparsity/model_sparsity": 0.866201427988974, "compression_loss": 101.68143463134766, "distillation_loss": 3.810199022293091, "epoch": 3.95, "learning_rate": 3.361510284587207e-05, "loss": 106.0043, "step": 4672, "task_loss": 2.0786898136138916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9569384971192245, "compression/movement_sparsity/importance_threshold": -0.00030159134711000776, "compression/movement_sparsity/linear_layer_sparsity": 0.8970608906189426, "compression/movement_sparsity/model_sparsity": 0.8662440893440928, "compression_loss": 101.69183349609375, "distillation_loss": 5.2797956466674805, "epoch": 3.95, "learning_rate": 3.3610406687329766e-05, "loss": 105.9176, "step": 4673, "task_loss": 3.310303211212158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9570421764555826, "compression/movement_sparsity/importance_threshold": -0.0003008652045318836, "compression/movement_sparsity/linear_layer_sparsity": 0.8971112225305335, "compression/movement_sparsity/model_sparsity": 0.8662926921996815, "compression_loss": 101.70219421386719, "distillation_loss": 3.690847873687744, "epoch": 3.95, "learning_rate": 3.360571052878745e-05, "loss": 105.8331, "step": 4674, "task_loss": 1.8293949365615845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9571456892388259, "compression/movement_sparsity/importance_threshold": -0.00030014022844760974, "compression/movement_sparsity/linear_layer_sparsity": 0.8971754818699231, "compression/movement_sparsity/model_sparsity": 0.8663547440330783, "compression_loss": 101.71257019042969, "distillation_loss": 4.485530853271484, "epoch": 3.95, "learning_rate": 3.3601014370245146e-05, "loss": 106.0385, "step": 4675, "task_loss": 2.748704671859741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9572490356028394, "compression/movement_sparsity/importance_threshold": -0.00029941641791949095, "compression/movement_sparsity/linear_layer_sparsity": 0.89720132154119, "compression/movement_sparsity/model_sparsity": 0.866379696032145, "compression_loss": 101.72290802001953, "distillation_loss": 4.996626853942871, "epoch": 3.95, "learning_rate": 3.3596318211702825e-05, "loss": 105.9135, "step": 4676, "task_loss": 2.5895516872406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9573522156815082, "compression/movement_sparsity/importance_threshold": -0.00029869377200983024, "compression/movement_sparsity/linear_layer_sparsity": 0.8972854465438611, "compression/movement_sparsity/model_sparsity": 0.8664609310821756, "compression_loss": 101.73324584960938, "distillation_loss": 2.9844400882720947, "epoch": 3.95, "learning_rate": 3.359162205316052e-05, "loss": 105.5686, "step": 4677, "task_loss": 2.039533853530884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9574552296087175, "compression/movement_sparsity/importance_threshold": -0.00029797228978093154, "compression/movement_sparsity/linear_layer_sparsity": 0.8972777316074007, "compression/movement_sparsity/model_sparsity": 0.8664534811775165, "compression_loss": 101.74354553222656, "distillation_loss": 4.03771448135376, "epoch": 3.95, "learning_rate": 3.3586925894618205e-05, "loss": 105.8347, "step": 4678, "task_loss": 2.953936815261841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9575580775183525, "compression/movement_sparsity/importance_threshold": -0.0002972519702950987, "compression/movement_sparsity/linear_layer_sparsity": 0.8973379486539618, "compression/movement_sparsity/model_sparsity": 0.866511629583279, "compression_loss": 101.75387573242188, "distillation_loss": 4.7701311111450195, "epoch": 3.95, "learning_rate": 3.358222973607589e-05, "loss": 105.8671, "step": 4679, "task_loss": 2.3057758808135986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9576607595442982, "compression/movement_sparsity/importance_threshold": -0.00029653281261463656, "compression/movement_sparsity/linear_layer_sparsity": 0.8974160161794738, "compression/movement_sparsity/model_sparsity": 0.8665870152491257, "compression_loss": 101.76419830322266, "distillation_loss": 4.448347568511963, "epoch": 3.96, "learning_rate": 3.357753357753358e-05, "loss": 105.7598, "step": 4680, "task_loss": 2.425097942352295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9577632758204399, "compression/movement_sparsity/importance_threshold": -0.0002958148158018481, "compression/movement_sparsity/linear_layer_sparsity": 0.8976089253634868, "compression/movement_sparsity/model_sparsity": 0.8667732974092098, "compression_loss": 101.77444458007812, "distillation_loss": 5.4012274742126465, "epoch": 3.96, "learning_rate": 3.3572837418991264e-05, "loss": 106.2318, "step": 4681, "task_loss": 2.782803535461426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9578656264806625, "compression/movement_sparsity/importance_threshold": -0.00029509797891903895, "compression/movement_sparsity/linear_layer_sparsity": 0.8976710622010373, "compression/movement_sparsity/model_sparsity": 0.8668332996552353, "compression_loss": 101.78473663330078, "distillation_loss": 3.6692802906036377, "epoch": 3.96, "learning_rate": 3.356814126044896e-05, "loss": 106.2184, "step": 4682, "task_loss": 1.7455674409866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9579678116588514, "compression/movement_sparsity/importance_threshold": -0.0002943823010285113, "compression/movement_sparsity/linear_layer_sparsity": 0.8977285247648745, "compression/movement_sparsity/model_sparsity": 0.8668887882032291, "compression_loss": 101.79500579833984, "distillation_loss": 4.911920547485352, "epoch": 3.96, "learning_rate": 3.356344510190664e-05, "loss": 106.4521, "step": 4683, "task_loss": 2.149130344390869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9580698314888917, "compression/movement_sparsity/importance_threshold": -0.000293667781192569, "compression/movement_sparsity/linear_layer_sparsity": 0.8977314342617776, "compression/movement_sparsity/model_sparsity": 0.8668915977499629, "compression_loss": 101.80519104003906, "distillation_loss": 4.967400550842285, "epoch": 3.96, "learning_rate": 3.355874894336433e-05, "loss": 106.0529, "step": 4684, "task_loss": 3.211292266845703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9581716861046683, "compression/movement_sparsity/importance_threshold": -0.00029295441847351864, "compression/movement_sparsity/linear_layer_sparsity": 0.8977829705142999, "compression/movement_sparsity/model_sparsity": 0.866941363573667, "compression_loss": 101.8154296875, "distillation_loss": 2.9081835746765137, "epoch": 3.96, "learning_rate": 3.3554052784822016e-05, "loss": 105.4729, "step": 4685, "task_loss": 1.7217074632644653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9582733756400666, "compression/movement_sparsity/importance_threshold": -0.00029224221193366144, "compression/movement_sparsity/linear_layer_sparsity": 0.8979159011351044, "compression/movement_sparsity/model_sparsity": 0.8670697276187045, "compression_loss": 101.82569122314453, "distillation_loss": 3.920755624771118, "epoch": 3.96, "learning_rate": 3.35493566262797e-05, "loss": 106.2553, "step": 4686, "task_loss": 2.587437152862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9583749002289718, "compression/movement_sparsity/importance_threshold": -0.0002915311606353013, "compression/movement_sparsity/linear_layer_sparsity": 0.898004855425668, "compression/movement_sparsity/model_sparsity": 0.8671556260557318, "compression_loss": 101.83583068847656, "distillation_loss": 4.951447010040283, "epoch": 3.96, "learning_rate": 3.3544660467737395e-05, "loss": 106.7086, "step": 4687, "task_loss": 3.45975399017334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9584762600052686, "compression/movement_sparsity/importance_threshold": -0.00029082126364074565, "compression/movement_sparsity/linear_layer_sparsity": 0.8980404729143963, "compression/movement_sparsity/model_sparsity": 0.8671900199741501, "compression_loss": 101.84609985351562, "distillation_loss": 4.715553283691406, "epoch": 3.96, "learning_rate": 3.353996430919508e-05, "loss": 106.6261, "step": 4688, "task_loss": 2.714484214782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9585774551028428, "compression/movement_sparsity/importance_threshold": -0.00029011252001229313, "compression/movement_sparsity/linear_layer_sparsity": 0.8980800253784444, "compression/movement_sparsity/model_sparsity": 0.8672282136893806, "compression_loss": 101.85623168945312, "distillation_loss": 3.2695062160491943, "epoch": 3.96, "learning_rate": 3.353526815065277e-05, "loss": 106.0076, "step": 4689, "task_loss": 1.284071922302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9586784856555789, "compression/movement_sparsity/importance_threshold": -0.00028940492881225285, "compression/movement_sparsity/linear_layer_sparsity": 0.8981603704199749, "compression/movement_sparsity/model_sparsity": 0.8673057986315642, "compression_loss": 101.86640167236328, "distillation_loss": 3.63126540184021, "epoch": 3.96, "learning_rate": 3.3530571992110454e-05, "loss": 105.3049, "step": 4690, "task_loss": 2.9905450344085693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9587793517973624, "compression/movement_sparsity/importance_threshold": -0.0002886984891029261, "compression/movement_sparsity/linear_layer_sparsity": 0.8981480289064718, "compression/movement_sparsity/model_sparsity": 0.8672938810870169, "compression_loss": 101.87654876708984, "distillation_loss": 2.702547073364258, "epoch": 3.96, "learning_rate": 3.352587583356814e-05, "loss": 106.0171, "step": 4691, "task_loss": 2.364891767501831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9588800536620783, "compression/movement_sparsity/importance_threshold": -0.0002879931999466177, "compression/movement_sparsity/linear_layer_sparsity": 0.8982173798654419, "compression/movement_sparsity/model_sparsity": 0.867360849627198, "compression_loss": 101.88666534423828, "distillation_loss": 5.127796173095703, "epoch": 3.97, "learning_rate": 3.3521179675025834e-05, "loss": 106.386, "step": 4692, "task_loss": 3.3984546661376953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9589805913836119, "compression/movement_sparsity/importance_threshold": -0.0002872890604056306, "compression/movement_sparsity/linear_layer_sparsity": 0.8982264899295158, "compression/movement_sparsity/model_sparsity": 0.8673696467325449, "compression_loss": 101.89674377441406, "distillation_loss": 4.128251075744629, "epoch": 3.97, "learning_rate": 3.3516483516483513e-05, "loss": 105.8835, "step": 4693, "task_loss": 1.906435251235962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9590809650958482, "compression/movement_sparsity/importance_threshold": -0.00028658606954226964, "compression/movement_sparsity/linear_layer_sparsity": 0.8982572900545192, "compression/movement_sparsity/model_sparsity": 0.8673993887785022, "compression_loss": 101.90684509277344, "distillation_loss": 4.028168678283691, "epoch": 3.97, "learning_rate": 3.3511787357941207e-05, "loss": 106.0872, "step": 4694, "task_loss": 1.67355215549469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9591811749326724, "compression/movement_sparsity/importance_threshold": -0.0002858842264188378, "compression/movement_sparsity/linear_layer_sparsity": 0.8982984045845277, "compression/movement_sparsity/model_sparsity": 0.8674390908979218, "compression_loss": 101.9168930053711, "distillation_loss": 3.3944010734558105, "epoch": 3.97, "learning_rate": 3.350709119939889e-05, "loss": 105.5808, "step": 4695, "task_loss": 2.459972620010376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9592812210279696, "compression/movement_sparsity/importance_threshold": -0.00028518353009764075, "compression/movement_sparsity/linear_layer_sparsity": 0.8984599412834907, "compression/movement_sparsity/model_sparsity": 0.8675950783143305, "compression_loss": 101.92695617675781, "distillation_loss": 3.7825629711151123, "epoch": 3.97, "learning_rate": 3.350239504085658e-05, "loss": 106.3229, "step": 4696, "task_loss": 1.5147826671600342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.959381103515625, "compression/movement_sparsity/importance_threshold": -0.0002844839796409815, "compression/movement_sparsity/linear_layer_sparsity": 0.8984181351517594, "compression/movement_sparsity/model_sparsity": 0.8675547083518348, "compression_loss": 101.93692016601562, "distillation_loss": 5.284494400024414, "epoch": 3.97, "learning_rate": 3.349769888231427e-05, "loss": 106.4689, "step": 4697, "task_loss": 3.5198774337768555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9594808225295237, "compression/movement_sparsity/importance_threshold": -0.00028378557411116393, "compression/movement_sparsity/linear_layer_sparsity": 0.8985062309022531, "compression/movement_sparsity/model_sparsity": 0.8676397777422848, "compression_loss": 101.94683837890625, "distillation_loss": 5.379023551940918, "epoch": 3.97, "learning_rate": 3.349300272377195e-05, "loss": 106.342, "step": 4698, "task_loss": 2.7044739723205566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9595803782035508, "compression/movement_sparsity/importance_threshold": -0.00028308831257049284, "compression/movement_sparsity/linear_layer_sparsity": 0.8985478343231347, "compression/movement_sparsity/model_sparsity": 0.867679951957672, "compression_loss": 101.95682525634766, "distillation_loss": 5.488298416137695, "epoch": 3.97, "learning_rate": 3.3488306565229645e-05, "loss": 106.6213, "step": 4699, "task_loss": 3.1338329315185547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9596797706715915, "compression/movement_sparsity/importance_threshold": -0.0002823921940812704, "compression/movement_sparsity/linear_layer_sparsity": 0.8985507080475349, "compression/movement_sparsity/model_sparsity": 0.8676827269607985, "compression_loss": 101.96670532226562, "distillation_loss": 4.243288993835449, "epoch": 3.97, "learning_rate": 3.348361040668733e-05, "loss": 106.7234, "step": 4700, "task_loss": 1.9602845907211304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9597790000675309, "compression/movement_sparsity/importance_threshold": -0.00028169721770580306, "compression/movement_sparsity/linear_layer_sparsity": 0.8986428341666896, "compression/movement_sparsity/model_sparsity": 0.8677716882643471, "compression_loss": 101.97666931152344, "distillation_loss": 3.7543842792510986, "epoch": 3.97, "learning_rate": 3.3478914248145025e-05, "loss": 106.0184, "step": 4701, "task_loss": 2.676030158996582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9598780665252543, "compression/movement_sparsity/importance_threshold": -0.00028100338250639217, "compression/movement_sparsity/linear_layer_sparsity": 0.8987100983963234, "compression/movement_sparsity/model_sparsity": 0.8678366417607641, "compression_loss": 101.98658752441406, "distillation_loss": 5.363182067871094, "epoch": 3.97, "learning_rate": 3.3474218089602704e-05, "loss": 106.677, "step": 4702, "task_loss": 3.5991029739379883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9599769701786466, "compression/movement_sparsity/importance_threshold": -0.00028031068754534334, "compression/movement_sparsity/linear_layer_sparsity": 0.8988445076139149, "compression/movement_sparsity/model_sparsity": 0.8679664336082402, "compression_loss": 101.9964599609375, "distillation_loss": 4.069483757019043, "epoch": 3.97, "learning_rate": 3.346952193106039e-05, "loss": 106.7822, "step": 4703, "task_loss": 2.5340805053710938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9600757111615931, "compression/movement_sparsity/importance_threshold": -0.00027961913188496047, "compression/movement_sparsity/linear_layer_sparsity": 0.8989302066067139, "compression/movement_sparsity/model_sparsity": 0.8680491885769955, "compression_loss": 102.00642395019531, "distillation_loss": 3.8172523975372314, "epoch": 3.98, "learning_rate": 3.3464825772518084e-05, "loss": 107.1222, "step": 4704, "task_loss": 2.0957939624786377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9601742896079788, "compression/movement_sparsity/importance_threshold": -0.00027892871458754746, "compression/movement_sparsity/linear_layer_sparsity": 0.8989639758494586, "compression/movement_sparsity/model_sparsity": 0.8680817977423657, "compression_loss": 102.01628112792969, "distillation_loss": 3.121922492980957, "epoch": 3.98, "learning_rate": 3.346012961397577e-05, "loss": 106.5005, "step": 4705, "task_loss": 2.029348373413086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.960272705651689, "compression/movement_sparsity/importance_threshold": -0.0002782394347154082, "compression/movement_sparsity/linear_layer_sparsity": 0.8990171337887793, "compression/movement_sparsity/model_sparsity": 0.8681331295429378, "compression_loss": 102.0261459350586, "distillation_loss": 4.173529624938965, "epoch": 3.98, "learning_rate": 3.3455433455433456e-05, "loss": 106.5012, "step": 4706, "task_loss": 2.138794422149658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9603709594266087, "compression/movement_sparsity/importance_threshold": -0.0002775512913308475, "compression/movement_sparsity/linear_layer_sparsity": 0.8989910794824949, "compression/movement_sparsity/model_sparsity": 0.8681079702822266, "compression_loss": 102.03602600097656, "distillation_loss": 4.12360143661499, "epoch": 3.98, "learning_rate": 3.345073729689114e-05, "loss": 106.5132, "step": 4707, "task_loss": 2.365708589553833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9604690510666232, "compression/movement_sparsity/importance_threshold": -0.0002768642834961666, "compression/movement_sparsity/linear_layer_sparsity": 0.8989867510096431, "compression/movement_sparsity/model_sparsity": 0.8681037905057333, "compression_loss": 102.04580688476562, "distillation_loss": 3.2090349197387695, "epoch": 3.98, "learning_rate": 3.3446041138348836e-05, "loss": 106.082, "step": 4708, "task_loss": 1.8742254972457886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9605669807056174, "compression/movement_sparsity/importance_threshold": -0.00027617841027367286, "compression/movement_sparsity/linear_layer_sparsity": 0.899009645411504, "compression/movement_sparsity/model_sparsity": 0.8681258984144588, "compression_loss": 102.05554962158203, "distillation_loss": 3.7272791862487793, "epoch": 3.98, "learning_rate": 3.344134497980652e-05, "loss": 105.9248, "step": 4709, "task_loss": 1.3674213886260986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9606647484774767, "compression/movement_sparsity/importance_threshold": -0.00027549367072566763, "compression/movement_sparsity/linear_layer_sparsity": 0.8990709237089847, "compression/movement_sparsity/model_sparsity": 0.8681850716139069, "compression_loss": 102.06529998779297, "distillation_loss": 4.138559341430664, "epoch": 3.98, "learning_rate": 3.34366488212642e-05, "loss": 106.6362, "step": 4710, "task_loss": 2.0805158615112305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9607623545160862, "compression/movement_sparsity/importance_threshold": -0.0002748100639144565, "compression/movement_sparsity/linear_layer_sparsity": 0.899139583066232, "compression/movement_sparsity/model_sparsity": 0.8682513723110119, "compression_loss": 102.07511138916016, "distillation_loss": 5.992580413818359, "epoch": 3.98, "learning_rate": 3.3431952662721895e-05, "loss": 107.424, "step": 4711, "task_loss": 2.6298882961273193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9608597989553309, "compression/movement_sparsity/importance_threshold": -0.00027412758890234257, "compression/movement_sparsity/linear_layer_sparsity": 0.8991688330494427, "compression/movement_sparsity/model_sparsity": 0.8682796174673159, "compression_loss": 102.08480072021484, "distillation_loss": 3.034358024597168, "epoch": 3.98, "learning_rate": 3.342725650417958e-05, "loss": 105.9314, "step": 4712, "task_loss": 2.4491961002349854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.960957081929096, "compression/movement_sparsity/importance_threshold": -0.0002734462447516305, "compression/movement_sparsity/linear_layer_sparsity": 0.899139547293729, "compression/movement_sparsity/model_sparsity": 0.8682513377674045, "compression_loss": 102.09453582763672, "distillation_loss": 5.255185127258301, "epoch": 3.98, "learning_rate": 3.3422560345637274e-05, "loss": 106.4459, "step": 4713, "task_loss": 2.728890895843506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9610542035712667, "compression/movement_sparsity/importance_threshold": -0.0002727660305246243, "compression/movement_sparsity/linear_layer_sparsity": 0.8992183541176345, "compression/movement_sparsity/model_sparsity": 0.8683274373344706, "compression_loss": 102.1041259765625, "distillation_loss": 5.061298370361328, "epoch": 3.98, "learning_rate": 3.341786418709496e-05, "loss": 106.3038, "step": 4714, "task_loss": 2.866028308868408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.961151164015728, "compression/movement_sparsity/importance_threshold": -0.0002720869452836278, "compression/movement_sparsity/linear_layer_sparsity": 0.8992474729350013, "compression/movement_sparsity/model_sparsity": 0.8683555558308809, "compression_loss": 102.11378479003906, "distillation_loss": 4.558058261871338, "epoch": 3.99, "learning_rate": 3.341316802855265e-05, "loss": 106.215, "step": 4715, "task_loss": 2.8137505054473877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9612479633963652, "compression/movement_sparsity/importance_threshold": -0.000271408988090944, "compression/movement_sparsity/linear_layer_sparsity": 0.8993604424991835, "compression/movement_sparsity/model_sparsity": 0.8684646445429984, "compression_loss": 102.12342834472656, "distillation_loss": 4.739848613739014, "epoch": 3.99, "learning_rate": 3.340847187001033e-05, "loss": 106.535, "step": 4716, "task_loss": 1.9558812379837036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9613446018470634, "compression/movement_sparsity/importance_threshold": -0.0002707321580088778, "compression/movement_sparsity/linear_layer_sparsity": 0.8993654744979258, "compression/movement_sparsity/model_sparsity": 0.8684695036771036, "compression_loss": 102.13296508789062, "distillation_loss": 5.490961074829102, "epoch": 3.99, "learning_rate": 3.340377571146802e-05, "loss": 107.3266, "step": 4717, "task_loss": 2.1332499980926514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9614410795017077, "compression/movement_sparsity/importance_threshold": -0.00027005645409973293, "compression/movement_sparsity/linear_layer_sparsity": 0.8993622311243288, "compression/movement_sparsity/model_sparsity": 0.8684663717233676, "compression_loss": 102.14260864257812, "distillation_loss": 5.744338035583496, "epoch": 3.99, "learning_rate": 3.339907955292571e-05, "loss": 106.5651, "step": 4718, "task_loss": 3.526001214981079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9615373964941832, "compression/movement_sparsity/importance_threshold": -0.0002693818754258134, "compression/movement_sparsity/linear_layer_sparsity": 0.8995838417798413, "compression/movement_sparsity/model_sparsity": 0.8686803693711092, "compression_loss": 102.15216827392578, "distillation_loss": 4.061690807342529, "epoch": 3.99, "learning_rate": 3.339438339438339e-05, "loss": 107.264, "step": 4719, "task_loss": 3.2331526279449463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9616335529583752, "compression/movement_sparsity/importance_threshold": -0.00026870842104942394, "compression/movement_sparsity/linear_layer_sparsity": 0.8996112196687333, "compression/movement_sparsity/model_sparsity": 0.8687068067452933, "compression_loss": 102.16175079345703, "distillation_loss": 5.684405326843262, "epoch": 3.99, "learning_rate": 3.3389687235841085e-05, "loss": 107.0543, "step": 4720, "task_loss": 3.286560535430908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9617295490281687, "compression/movement_sparsity/importance_threshold": -0.0002680360900328676, "compression/movement_sparsity/linear_layer_sparsity": 0.8997705623208513, "compression/movement_sparsity/model_sparsity": 0.8688606754871158, "compression_loss": 102.17129516601562, "distillation_loss": 4.273404121398926, "epoch": 3.99, "learning_rate": 3.338499107729877e-05, "loss": 106.9224, "step": 4721, "task_loss": 1.763748288154602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9618253848374488, "compression/movement_sparsity/importance_threshold": -0.00026736488143844824, "compression/movement_sparsity/linear_layer_sparsity": 0.8998249842219413, "compression/movement_sparsity/model_sparsity": 0.8689132278284821, "compression_loss": 102.18086242675781, "distillation_loss": 4.594601631164551, "epoch": 3.99, "learning_rate": 3.338029491875646e-05, "loss": 106.3894, "step": 4722, "task_loss": 2.9253575801849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9619210605201008, "compression/movement_sparsity/importance_threshold": -0.0002666947943284698, "compression/movement_sparsity/linear_layer_sparsity": 0.8998434905301123, "compression/movement_sparsity/model_sparsity": 0.8689310983880352, "compression_loss": 102.19035339355469, "distillation_loss": 4.620210647583008, "epoch": 3.99, "learning_rate": 3.3375598760214144e-05, "loss": 106.6182, "step": 4723, "task_loss": 2.4557876586914062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9620165762100098, "compression/movement_sparsity/importance_threshold": -0.000266025827765237, "compression/movement_sparsity/linear_layer_sparsity": 0.8999446790166702, "compression/movement_sparsity/model_sparsity": 0.8690288107387877, "compression_loss": 102.19989776611328, "distillation_loss": 5.3723464012146, "epoch": 3.99, "learning_rate": 3.337090260167183e-05, "loss": 106.8429, "step": 4724, "task_loss": 3.2323360443115234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9621119320410608, "compression/movement_sparsity/importance_threshold": -0.0002653579808110538, "compression/movement_sparsity/linear_layer_sparsity": 0.9000556930173601, "compression/movement_sparsity/model_sparsity": 0.8691360110670349, "compression_loss": 102.20938873291016, "distillation_loss": 4.161534309387207, "epoch": 3.99, "learning_rate": 3.3366206443129524e-05, "loss": 107.314, "step": 4725, "task_loss": 2.3385519981384277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9622071281471392, "compression/movement_sparsity/importance_threshold": -0.0002646912525282241, "compression/movement_sparsity/linear_layer_sparsity": 0.9001602321950237, "compression/movement_sparsity/model_sparsity": 0.8692369590023457, "compression_loss": 102.2188720703125, "distillation_loss": 4.973778247833252, "epoch": 3.99, "learning_rate": 3.336151028458721e-05, "loss": 107.117, "step": 4726, "task_loss": 2.5603156089782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9623021646621298, "compression/movement_sparsity/importance_threshold": -0.00026402564197905085, "compression/movement_sparsity/linear_layer_sparsity": 0.9001480695440351, "compression/movement_sparsity/model_sparsity": 0.8692252141758352, "compression_loss": 102.22834777832031, "distillation_loss": 4.6997151374816895, "epoch": 4.0, "learning_rate": 3.3356814126044896e-05, "loss": 107.3776, "step": 4727, "task_loss": 2.01159930229187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9623970417199179, "compression/movement_sparsity/importance_threshold": -0.0002633611482258406, "compression/movement_sparsity/linear_layer_sparsity": 0.9001439080095301, "compression/movement_sparsity/model_sparsity": 0.8692211956028428, "compression_loss": 102.23784637451172, "distillation_loss": 4.433721542358398, "epoch": 4.0, "learning_rate": 3.335211796750258e-05, "loss": 106.6445, "step": 4728, "task_loss": 3.8411970138549805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9624917594543888, "compression/movement_sparsity/importance_threshold": -0.0002626977703308938, "compression/movement_sparsity/linear_layer_sparsity": 0.9002022648859401, "compression/movement_sparsity/model_sparsity": 0.8692775477410214, "compression_loss": 102.24736022949219, "distillation_loss": 4.056196689605713, "epoch": 4.0, "learning_rate": 3.334742180896027e-05, "loss": 106.2125, "step": 4729, "task_loss": 1.5910521745681763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9625863179994274, "compression/movement_sparsity/importance_threshold": -0.0002620355073565169, "compression/movement_sparsity/linear_layer_sparsity": 0.9002586304263548, "compression/movement_sparsity/model_sparsity": 0.8693319769517222, "compression_loss": 102.25675964355469, "distillation_loss": 5.15647029876709, "epoch": 4.0, "learning_rate": 3.334272565041796e-05, "loss": 107.6808, "step": 4730, "task_loss": 4.085256099700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9626807174889191, "compression/movement_sparsity/importance_threshold": -0.0002613743583650121, "compression/movement_sparsity/linear_layer_sparsity": 0.9002695768122445, "compression/movement_sparsity/model_sparsity": 0.8693425472955816, "compression_loss": 102.26620483398438, "distillation_loss": 3.7740845680236816, "epoch": 4.0, "learning_rate": 3.333802949187565e-05, "loss": 106.1293, "step": 4731, "task_loss": 2.223395586013794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9627749580567488, "compression/movement_sparsity/importance_threshold": -0.00026071432241868506, "compression/movement_sparsity/linear_layer_sparsity": 0.9002622672974837, "compression/movement_sparsity/model_sparsity": 0.8693354888851395, "compression_loss": 102.27556610107422, "distillation_loss": 3.833007335662842, "epoch": 4.0, "learning_rate": 3.3333333333333335e-05, "loss": 106.8324, "step": 4732, "task_loss": 2.9822402000427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9629629629629629, "compression/movement_sparsity/importance_threshold": -0.0002593975859107797, "compression/movement_sparsity/linear_layer_sparsity": 0.9003952217666237, "compression/movement_sparsity/model_sparsity": 0.8694638759592487, "compression_loss": 102.29492950439453, "distillation_loss": 2.872431755065918, "epoch": 4.0, "learning_rate": 3.332863717479102e-05, "loss": 186.7031, "step": 4733, "task_loss": 1.6889793872833252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9630567275691178, "compression/movement_sparsity/importance_threshold": -0.0002587408834738075, "compression/movement_sparsity/linear_layer_sparsity": 0.9005190542475221, "compression/movement_sparsity/model_sparsity": 0.869583454413475, "compression_loss": 102.30427551269531, "distillation_loss": 5.131381988525391, "epoch": 4.0, "learning_rate": 3.332394101624871e-05, "loss": 106.904, "step": 4734, "task_loss": 3.1679792404174805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9631503337891512, "compression/movement_sparsity/importance_threshold": -0.00025808529033122946, "compression/movement_sparsity/linear_layer_sparsity": 0.9005311692018401, "compression/movement_sparsity/model_sparsity": 0.8695951531818422, "compression_loss": 102.31358337402344, "distillation_loss": 4.491264343261719, "epoch": 4.0, "learning_rate": 3.33192448577064e-05, "loss": 106.0944, "step": 4735, "task_loss": 3.6429214477539062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9632437817569484, "compression/movement_sparsity/importance_threshold": -0.00025743080554534864, "compression/movement_sparsity/linear_layer_sparsity": 0.9005356049922006, "compression/movement_sparsity/model_sparsity": 0.8695994365891577, "compression_loss": 102.32286834716797, "distillation_loss": 5.38408899307251, "epoch": 4.0, "learning_rate": 3.331454869916408e-05, "loss": 106.8995, "step": 4736, "task_loss": 4.077313423156738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9633370716063946, "compression/movement_sparsity/importance_threshold": -0.0002567774281784672, "compression/movement_sparsity/linear_layer_sparsity": 0.9005687184057254, "compression/movement_sparsity/model_sparsity": 0.8696314124550591, "compression_loss": 102.3321533203125, "distillation_loss": 4.414595603942871, "epoch": 4.0, "learning_rate": 3.3309852540621773e-05, "loss": 106.334, "step": 4737, "task_loss": 2.7615694999694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9634302034713748, "compression/movement_sparsity/importance_threshold": -0.00025612515729289253, "compression/movement_sparsity/linear_layer_sparsity": 0.9006416466149864, "compression/movement_sparsity/model_sparsity": 0.8697018353559786, "compression_loss": 102.34141540527344, "distillation_loss": 3.2609522342681885, "epoch": 4.01, "learning_rate": 3.330515638207946e-05, "loss": 105.8877, "step": 4738, "task_loss": 1.8966424465179443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9635231774857742, "compression/movement_sparsity/importance_threshold": -0.0002554739919509259, "compression/movement_sparsity/linear_layer_sparsity": 0.9006663892628308, "compression/movement_sparsity/model_sparsity": 0.8697257280177523, "compression_loss": 102.3507080078125, "distillation_loss": 4.083732604980469, "epoch": 4.01, "learning_rate": 3.330046022353715e-05, "loss": 106.7867, "step": 4739, "task_loss": 2.075038433074951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9636159937834781, "compression/movement_sparsity/importance_threshold": -0.0002548239312148721, "compression/movement_sparsity/linear_layer_sparsity": 0.9006817237424105, "compression/movement_sparsity/model_sparsity": 0.869740535710784, "compression_loss": 102.35995483398438, "distillation_loss": 3.284346580505371, "epoch": 4.01, "learning_rate": 3.329576406499483e-05, "loss": 106.0452, "step": 4740, "task_loss": 1.9546148777008057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9637086524983712, "compression/movement_sparsity/importance_threshold": -0.00025417497414703585, "compression/movement_sparsity/linear_layer_sparsity": 0.9007792634336719, "compression/movement_sparsity/model_sparsity": 0.8698347246135834, "compression_loss": 102.3691635131836, "distillation_loss": 4.348479747772217, "epoch": 4.01, "learning_rate": 3.3291067906452526e-05, "loss": 106.5826, "step": 4741, "task_loss": 1.425667643547058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9638011537643392, "compression/movement_sparsity/importance_threshold": -0.0002535271198097202, "compression/movement_sparsity/linear_layer_sparsity": 0.9009048726155481, "compression/movement_sparsity/model_sparsity": 0.8699560187336431, "compression_loss": 102.37838745117188, "distillation_loss": 6.096728324890137, "epoch": 4.01, "learning_rate": 3.328637174791021e-05, "loss": 107.232, "step": 4742, "task_loss": 3.0057220458984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9638934977152669, "compression/movement_sparsity/importance_threshold": -0.0002528803672652291, "compression/movement_sparsity/linear_layer_sparsity": 0.9009324055186193, "compression/movement_sparsity/model_sparsity": 0.8699826057967927, "compression_loss": 102.3875961303711, "distillation_loss": 2.8381001949310303, "epoch": 4.01, "learning_rate": 3.32816755893679e-05, "loss": 105.6779, "step": 4743, "task_loss": 1.089234709739685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9639856844850394, "compression/movement_sparsity/importance_threshold": -0.0002522347155758673, "compression/movement_sparsity/linear_layer_sparsity": 0.9009738658494891, "compression/movement_sparsity/model_sparsity": 0.8700226418377504, "compression_loss": 102.39678955078125, "distillation_loss": 3.860442876815796, "epoch": 4.01, "learning_rate": 3.327697943082559e-05, "loss": 106.7943, "step": 4744, "task_loss": 2.458700656890869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9640777142075421, "compression/movement_sparsity/importance_threshold": -0.0002515901638039386, "compression/movement_sparsity/linear_layer_sparsity": 0.9010083028456215, "compression/movement_sparsity/model_sparsity": 0.870055895817125, "compression_loss": 102.40593719482422, "distillation_loss": 3.7795801162719727, "epoch": 4.01, "learning_rate": 3.327228327228327e-05, "loss": 106.849, "step": 4745, "task_loss": 2.152385711669922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9641695870166599, "compression/movement_sparsity/importance_threshold": -0.00025094671101174616, "compression/movement_sparsity/linear_layer_sparsity": 0.9011130208857997, "compression/movement_sparsity/model_sparsity": 0.8701570164704726, "compression_loss": 102.41505432128906, "distillation_loss": 3.747814655303955, "epoch": 4.01, "learning_rate": 3.3267587113740964e-05, "loss": 107.1793, "step": 4746, "task_loss": 2.3008198738098145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9642613030462781, "compression/movement_sparsity/importance_threshold": -0.0002503043562615947, "compression/movement_sparsity/linear_layer_sparsity": 0.9011355456384638, "compression/movement_sparsity/model_sparsity": 0.8701787674285885, "compression_loss": 102.42414093017578, "distillation_loss": 4.956059455871582, "epoch": 4.01, "learning_rate": 3.326289095519865e-05, "loss": 107.2513, "step": 4747, "task_loss": 2.8391451835632324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9643528624302817, "compression/movement_sparsity/importance_threshold": -0.0002496630986157881, "compression/movement_sparsity/linear_layer_sparsity": 0.901165892645097, "compression/movement_sparsity/model_sparsity": 0.8702080719221856, "compression_loss": 102.43321990966797, "distillation_loss": 5.313861846923828, "epoch": 4.01, "learning_rate": 3.325819479665634e-05, "loss": 107.2045, "step": 4748, "task_loss": 2.8192031383514404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964444265302556, "compression/movement_sparsity/importance_threshold": -0.00024902293713663023, "compression/movement_sparsity/linear_layer_sparsity": 0.9011905160512651, "compression/movement_sparsity/model_sparsity": 0.8702318494386013, "compression_loss": 102.44236755371094, "distillation_loss": 5.839061260223389, "epoch": 4.01, "learning_rate": 3.325349863811402e-05, "loss": 106.8445, "step": 4749, "task_loss": 2.828413724899292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964535511796986, "compression/movement_sparsity/importance_threshold": -0.00024838387088642506, "compression/movement_sparsity/linear_layer_sparsity": 0.901203847270682, "compression/movement_sparsity/model_sparsity": 0.8702447226896196, "compression_loss": 102.45146179199219, "distillation_loss": 5.051539421081543, "epoch": 4.02, "learning_rate": 3.324880247957171e-05, "loss": 107.196, "step": 4750, "task_loss": 2.9877536296844482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964626602047457, "compression/movement_sparsity/importance_threshold": -0.00024774589892747646, "compression/movement_sparsity/linear_layer_sparsity": 0.9011523110181598, "compression/movement_sparsity/model_sparsity": 0.8701949568659156, "compression_loss": 102.4605484008789, "distillation_loss": 5.509265422821045, "epoch": 4.02, "learning_rate": 3.32441063210294e-05, "loss": 106.9123, "step": 4751, "task_loss": 2.274599075317383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9647175361878539, "compression/movement_sparsity/importance_threshold": -0.0002471090203220883, "compression/movement_sparsity/linear_layer_sparsity": 0.9011253266267999, "compression/movement_sparsity/model_sparsity": 0.8701688994714125, "compression_loss": 102.46958923339844, "distillation_loss": 4.1670098304748535, "epoch": 4.02, "learning_rate": 3.323941016248709e-05, "loss": 106.8653, "step": 4752, "task_loss": 2.10685396194458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.964808314352062, "compression/movement_sparsity/importance_threshold": -0.0002464732341325654, "compression/movement_sparsity/linear_layer_sparsity": 0.9011417700539698, "compression/movement_sparsity/model_sparsity": 0.8701847780162733, "compression_loss": 102.47862243652344, "distillation_loss": 3.422401189804077, "epoch": 4.02, "learning_rate": 3.3234714003944775e-05, "loss": 106.9227, "step": 4753, "task_loss": 1.9792814254760742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9648989366739665, "compression/movement_sparsity/importance_threshold": -0.0002458385394212108, "compression/movement_sparsity/linear_layer_sparsity": 0.9011828845839782, "compression/movement_sparsity/model_sparsity": 0.8702244801356929, "compression_loss": 102.4876480102539, "distillation_loss": 3.653017044067383, "epoch": 4.02, "learning_rate": 3.323001784540246e-05, "loss": 106.6764, "step": 4754, "task_loss": 1.923354148864746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9649894032874524, "compression/movement_sparsity/importance_threshold": -0.0002452049352503292, "compression/movement_sparsity/linear_layer_sparsity": 0.9012377238309355, "compression/movement_sparsity/model_sparsity": 0.8702774354858119, "compression_loss": 102.49662017822266, "distillation_loss": 6.276140213012695, "epoch": 4.02, "learning_rate": 3.322532168686015e-05, "loss": 106.7666, "step": 4755, "task_loss": 3.2657864093780518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.965079714326405, "compression/movement_sparsity/importance_threshold": -0.0002445724206822236, "compression/movement_sparsity/linear_layer_sparsity": 0.9012958899206632, "compression/movement_sparsity/model_sparsity": 0.8703336033914176, "compression_loss": 102.50564575195312, "distillation_loss": 4.390575885772705, "epoch": 4.02, "learning_rate": 3.322062552831784e-05, "loss": 106.6049, "step": 4756, "task_loss": 2.2503719329833984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9651698699247092, "compression/movement_sparsity/importance_threshold": -0.00024394099477919977, "compression/movement_sparsity/linear_layer_sparsity": 0.9014229896234939, "compression/movement_sparsity/model_sparsity": 0.8704563368284517, "compression_loss": 102.5146484375, "distillation_loss": 4.468634605407715, "epoch": 4.02, "learning_rate": 3.321592936977552e-05, "loss": 107.1529, "step": 4757, "task_loss": 2.3377504348754883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9652598702162504, "compression/movement_sparsity/importance_threshold": -0.0002433106566035589, "compression/movement_sparsity/linear_layer_sparsity": 0.9014323977917587, "compression/movement_sparsity/model_sparsity": 0.8704654217971936, "compression_loss": 102.52351379394531, "distillation_loss": 4.082657814025879, "epoch": 4.02, "learning_rate": 3.3211233211233214e-05, "loss": 107.2328, "step": 4758, "task_loss": 1.8868112564086914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9653497153349135, "compression/movement_sparsity/importance_threshold": -0.00024268140521760753, "compression/movement_sparsity/linear_layer_sparsity": 0.901421153301678, "compression/movement_sparsity/model_sparsity": 0.8704545635899393, "compression_loss": 102.53240966796875, "distillation_loss": 2.7098288536071777, "epoch": 4.02, "learning_rate": 3.32065370526909e-05, "loss": 106.782, "step": 4759, "task_loss": 1.1418133974075317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9654394054145838, "compression/movement_sparsity/importance_threshold": -0.00024205323968364868, "compression/movement_sparsity/linear_layer_sparsity": 0.9014085613806545, "compression/movement_sparsity/model_sparsity": 0.8704424042401403, "compression_loss": 102.54130554199219, "distillation_loss": 3.3141701221466064, "epoch": 4.02, "learning_rate": 3.3201840894148586e-05, "loss": 106.5259, "step": 4760, "task_loss": 1.3519169092178345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9655289405891465, "compression/movement_sparsity/importance_threshold": -0.00024142615906398626, "compression/movement_sparsity/linear_layer_sparsity": 0.9014684683988571, "compression/movement_sparsity/model_sparsity": 0.8705002532679721, "compression_loss": 102.55020141601562, "distillation_loss": 4.16291618347168, "epoch": 4.02, "learning_rate": 3.319714473560628e-05, "loss": 107.1851, "step": 4761, "task_loss": 2.811535120010376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9656183209924866, "compression/movement_sparsity/importance_threshold": -0.00024080016242092415, "compression/movement_sparsity/linear_layer_sparsity": 0.901521423627328, "compression/movement_sparsity/model_sparsity": 0.8705513893214356, "compression_loss": 102.55904388427734, "distillation_loss": 6.311136722564697, "epoch": 4.03, "learning_rate": 3.319244857706396e-05, "loss": 108.194, "step": 4762, "task_loss": 3.2755727767944336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9657075467584892, "compression/movement_sparsity/importance_threshold": -0.00024017524881676713, "compression/movement_sparsity/linear_layer_sparsity": 0.901529508212985, "compression/movement_sparsity/model_sparsity": 0.8705591961767043, "compression_loss": 102.56788635253906, "distillation_loss": 4.883666038513184, "epoch": 4.03, "learning_rate": 3.318775241852165e-05, "loss": 107.0493, "step": 4763, "task_loss": 2.172210454940796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9657966180210396, "compression/movement_sparsity/importance_threshold": -0.00023955141731381822, "compression/movement_sparsity/linear_layer_sparsity": 0.9015997415603604, "compression/movement_sparsity/model_sparsity": 0.8706270167925342, "compression_loss": 102.57667541503906, "distillation_loss": 6.006497383117676, "epoch": 4.03, "learning_rate": 3.318305625997934e-05, "loss": 108.395, "step": 4764, "task_loss": 2.446901321411133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9658855349140228, "compression/movement_sparsity/importance_threshold": -0.00023892866697438134, "compression/movement_sparsity/linear_layer_sparsity": 0.9016180570818491, "compression/movement_sparsity/model_sparsity": 0.8706447031195146, "compression_loss": 102.58549499511719, "distillation_loss": 5.098332405090332, "epoch": 4.03, "learning_rate": 3.317836010143703e-05, "loss": 107.4164, "step": 4765, "task_loss": 3.2266478538513184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9659742975713241, "compression/movement_sparsity/importance_threshold": -0.00023830699686076037, "compression/movement_sparsity/linear_layer_sparsity": 0.9016944909963949, "compression/movement_sparsity/model_sparsity": 0.8707185112939576, "compression_loss": 102.59429931640625, "distillation_loss": 6.077312469482422, "epoch": 4.03, "learning_rate": 3.317366394289471e-05, "loss": 107.6598, "step": 4766, "task_loss": 2.8815295696258545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9660629061268285, "compression/movement_sparsity/importance_threshold": -0.00023768640603526094, "compression/movement_sparsity/linear_layer_sparsity": 0.9017788544824187, "compression/movement_sparsity/model_sparsity": 0.870799976634704, "compression_loss": 102.60308837890625, "distillation_loss": 5.161858558654785, "epoch": 4.03, "learning_rate": 3.31689677843524e-05, "loss": 107.0557, "step": 4767, "task_loss": 2.3756675720214844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9661513607144212, "compression/movement_sparsity/importance_threshold": -0.00023706689356018523, "compression/movement_sparsity/linear_layer_sparsity": 0.901816010188772, "compression/movement_sparsity/model_sparsity": 0.8708358559282398, "compression_loss": 102.61189270019531, "distillation_loss": 5.02192497253418, "epoch": 4.03, "learning_rate": 3.316427162581009e-05, "loss": 106.2566, "step": 4768, "task_loss": 2.2402851581573486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9662396614679873, "compression/movement_sparsity/importance_threshold": -0.000236448458497838, "compression/movement_sparsity/linear_layer_sparsity": 0.9017842442061901, "compression/movement_sparsity/model_sparsity": 0.8708051812048831, "compression_loss": 102.62062072753906, "distillation_loss": 4.487470626831055, "epoch": 4.03, "learning_rate": 3.315957546726778e-05, "loss": 106.6382, "step": 4769, "task_loss": 1.8975750207901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9663278085214121, "compression/movement_sparsity/importance_threshold": -0.00023583109991052315, "compression/movement_sparsity/linear_layer_sparsity": 0.901828351702275, "compression/movement_sparsity/model_sparsity": 0.8708477734727871, "compression_loss": 102.62935638427734, "distillation_loss": 5.582508563995361, "epoch": 4.03, "learning_rate": 3.315487930872546e-05, "loss": 108.1974, "step": 4770, "task_loss": 3.0257253646850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9664158020085805, "compression/movement_sparsity/importance_threshold": -0.00023521481686054457, "compression/movement_sparsity/linear_layer_sparsity": 0.9019323304440598, "compression/movement_sparsity/model_sparsity": 0.8709481802249155, "compression_loss": 102.6380615234375, "distillation_loss": 4.577693462371826, "epoch": 4.03, "learning_rate": 3.315018315018315e-05, "loss": 106.8369, "step": 4771, "task_loss": 1.8708293437957764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9665036420633777, "compression/movement_sparsity/importance_threshold": -0.00023459960841020616, "compression/movement_sparsity/linear_layer_sparsity": 0.90196788831195, "compression/movement_sparsity/model_sparsity": 0.8709825165706548, "compression_loss": 102.64679718017578, "distillation_loss": 4.865073204040527, "epoch": 4.03, "learning_rate": 3.314548699164084e-05, "loss": 106.9721, "step": 4772, "task_loss": 1.9343392848968506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9665913288196889, "compression/movement_sparsity/importance_threshold": -0.0002339854736218127, "compression/movement_sparsity/linear_layer_sparsity": 0.9020291427610954, "compression/movement_sparsity/model_sparsity": 0.8710416667410313, "compression_loss": 102.655517578125, "distillation_loss": 4.84503173828125, "epoch": 4.03, "learning_rate": 3.314079083309853e-05, "loss": 107.1568, "step": 4773, "task_loss": 2.7602505683898926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9666788624113993, "compression/movement_sparsity/importance_threshold": -0.00023337241155766634, "compression/movement_sparsity/linear_layer_sparsity": 0.9021789222307695, "compression/movement_sparsity/model_sparsity": 0.8711863008251466, "compression_loss": 102.66424560546875, "distillation_loss": 6.017032146453857, "epoch": 4.04, "learning_rate": 3.3136094674556215e-05, "loss": 107.2348, "step": 4774, "task_loss": 2.9733078479766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.966766242972394, "compression/movement_sparsity/importance_threshold": -0.00023276042128007272, "compression/movement_sparsity/linear_layer_sparsity": 0.9022319370800785, "compression/movement_sparsity/model_sparsity": 0.8712374944512892, "compression_loss": 102.67283630371094, "distillation_loss": 3.5754313468933105, "epoch": 4.04, "learning_rate": 3.31313985160139e-05, "loss": 106.1666, "step": 4775, "task_loss": 1.8658751249313354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.966853470636558, "compression/movement_sparsity/importance_threshold": -0.00023214950185133573, "compression/movement_sparsity/linear_layer_sparsity": 0.9022712987574445, "compression/movement_sparsity/model_sparsity": 0.8712755039339469, "compression_loss": 102.68152618408203, "distillation_loss": 4.286721229553223, "epoch": 4.04, "learning_rate": 3.312670235747159e-05, "loss": 106.8369, "step": 4776, "task_loss": 2.066082000732422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9669405455377766, "compression/movement_sparsity/importance_threshold": -0.00023153965233375842, "compression/movement_sparsity/linear_layer_sparsity": 0.902301538446569, "compression/movement_sparsity/model_sparsity": 0.8713047047967218, "compression_loss": 102.69014739990234, "distillation_loss": 5.230113983154297, "epoch": 4.04, "learning_rate": 3.312200619892928e-05, "loss": 106.7509, "step": 4777, "task_loss": 3.0286102294921875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9670274678099349, "compression/movement_sparsity/importance_threshold": -0.00023093087178964553, "compression/movement_sparsity/linear_layer_sparsity": 0.9023404947022354, "compression/movement_sparsity/model_sparsity": 0.8713423227851627, "compression_loss": 102.69873809814453, "distillation_loss": 4.976161956787109, "epoch": 4.04, "learning_rate": 3.311731004038697e-05, "loss": 106.9686, "step": 4778, "task_loss": 2.811161994934082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9671142375869181, "compression/movement_sparsity/importance_threshold": -0.00023032315928130011, "compression/movement_sparsity/linear_layer_sparsity": 0.9023590129345739, "compression/movement_sparsity/model_sparsity": 0.8713602048592516, "compression_loss": 102.70729064941406, "distillation_loss": 5.952496528625488, "epoch": 4.04, "learning_rate": 3.3112613881844654e-05, "loss": 107.5295, "step": 4779, "task_loss": 2.136298656463623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9672008550026112, "compression/movement_sparsity/importance_threshold": -0.00022971651387102693, "compression/movement_sparsity/linear_layer_sparsity": 0.9023332209599775, "compression/movement_sparsity/model_sparsity": 0.871335298918328, "compression_loss": 102.71589660644531, "distillation_loss": 2.958289384841919, "epoch": 4.04, "learning_rate": 3.310791772330234e-05, "loss": 106.7317, "step": 4780, "task_loss": 1.438698649406433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9672873201908995, "compression/movement_sparsity/importance_threshold": -0.00022911093462113074, "compression/movement_sparsity/linear_layer_sparsity": 0.902385925780928, "compression/movement_sparsity/model_sparsity": 0.8713861931665399, "compression_loss": 102.72441864013672, "distillation_loss": 4.869726657867432, "epoch": 4.04, "learning_rate": 3.3103221564760027e-05, "loss": 107.1402, "step": 4781, "task_loss": 3.061699390411377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9673736332856679, "compression/movement_sparsity/importance_threshold": -0.0002285064205939137, "compression/movement_sparsity/linear_layer_sparsity": 0.902467499011725, "compression/movement_sparsity/model_sparsity": 0.8714649641059102, "compression_loss": 102.7329330444336, "distillation_loss": 5.688161849975586, "epoch": 4.04, "learning_rate": 3.309852540621772e-05, "loss": 107.5958, "step": 4782, "task_loss": 2.566164493560791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9674597944208019, "compression/movement_sparsity/importance_threshold": -0.0002279029708516806, "compression/movement_sparsity/linear_layer_sparsity": 0.9025167577482287, "compression/movement_sparsity/model_sparsity": 0.8715125306532775, "compression_loss": 102.74149322509766, "distillation_loss": 4.158825874328613, "epoch": 4.04, "learning_rate": 3.30938292476754e-05, "loss": 106.5877, "step": 4783, "task_loss": 2.038264274597168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9675458037301863, "compression/movement_sparsity/importance_threshold": -0.0002273005844567362, "compression/movement_sparsity/linear_layer_sparsity": 0.9025653845038478, "compression/movement_sparsity/model_sparsity": 0.8715594869302477, "compression_loss": 102.74993896484375, "distillation_loss": 3.600076198577881, "epoch": 4.04, "learning_rate": 3.308913308913309e-05, "loss": 107.1283, "step": 4784, "task_loss": 2.175241470336914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9676316613477064, "compression/movement_sparsity/importance_threshold": -0.00022669926047138352, "compression/movement_sparsity/linear_layer_sparsity": 0.9026348427803267, "compression/movement_sparsity/model_sparsity": 0.8716265591012509, "compression_loss": 102.75843811035156, "distillation_loss": 5.354538917541504, "epoch": 4.04, "learning_rate": 3.308443693059078e-05, "loss": 107.4868, "step": 4785, "task_loss": 3.5572593212127686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9677173674072472, "compression/movement_sparsity/importance_threshold": -0.00022609899795792733, "compression/movement_sparsity/linear_layer_sparsity": 0.9026958587461194, "compression/movement_sparsity/model_sparsity": 0.8716854789809115, "compression_loss": 102.76697540283203, "distillation_loss": 4.784909725189209, "epoch": 4.05, "learning_rate": 3.3079740772048465e-05, "loss": 107.4464, "step": 4786, "task_loss": 3.323976516723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9678029220426941, "compression/movement_sparsity/importance_threshold": -0.00022549979597867066, "compression/movement_sparsity/linear_layer_sparsity": 0.9026935096850951, "compression/movement_sparsity/model_sparsity": 0.87168321061736, "compression_loss": 102.7754898071289, "distillation_loss": 5.045825958251953, "epoch": 4.05, "learning_rate": 3.307504461350615e-05, "loss": 107.4556, "step": 4787, "task_loss": 2.6036953926086426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9678883253879321, "compression/movement_sparsity/importance_threshold": -0.0002249016535959174, "compression/movement_sparsity/linear_layer_sparsity": 0.9026732743726171, "compression/movement_sparsity/model_sparsity": 0.8716636704501167, "compression_loss": 102.78396606445312, "distillation_loss": 4.173159122467041, "epoch": 4.05, "learning_rate": 3.307034845496384e-05, "loss": 106.6053, "step": 4788, "task_loss": 2.4369330406188965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9679735775768464, "compression/movement_sparsity/importance_threshold": -0.00022430456987197235, "compression/movement_sparsity/linear_layer_sparsity": 0.9027439608383625, "compression/movement_sparsity/model_sparsity": 0.8717319286183067, "compression_loss": 102.79241180419922, "distillation_loss": 5.7845306396484375, "epoch": 4.05, "learning_rate": 3.306565229642153e-05, "loss": 107.5709, "step": 4789, "task_loss": 3.185696601867676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968058678743322, "compression/movement_sparsity/importance_threshold": -0.00022370854386913937, "compression/movement_sparsity/linear_layer_sparsity": 0.9028109150396378, "compression/movement_sparsity/model_sparsity": 0.8717965827367931, "compression_loss": 102.80086517333984, "distillation_loss": 4.4043989181518555, "epoch": 4.05, "learning_rate": 3.306095613787922e-05, "loss": 107.7359, "step": 4790, "task_loss": 2.699735164642334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9681436290212441, "compression/movement_sparsity/importance_threshold": -0.00022311357464972238, "compression/movement_sparsity/linear_layer_sparsity": 0.9028552371707403, "compression/movement_sparsity/model_sparsity": 0.8718393822663414, "compression_loss": 102.80928039550781, "distillation_loss": 4.339084625244141, "epoch": 4.05, "learning_rate": 3.3056259979336904e-05, "loss": 106.6341, "step": 4791, "task_loss": 2.271944761276245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968228428544498, "compression/movement_sparsity/importance_threshold": -0.0002225196612760244, "compression/movement_sparsity/linear_layer_sparsity": 0.902963150887845, "compression/movement_sparsity/model_sparsity": 0.871943588815282, "compression_loss": 102.81771850585938, "distillation_loss": 4.447133541107178, "epoch": 4.05, "learning_rate": 3.305156382079459e-05, "loss": 107.3625, "step": 4792, "task_loss": 2.0206549167633057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9683130774469686, "compression/movement_sparsity/importance_threshold": -0.00022192680281035108, "compression/movement_sparsity/linear_layer_sparsity": 0.9030550742961497, "compression/movement_sparsity/model_sparsity": 0.8720323543717221, "compression_loss": 102.82608795166016, "distillation_loss": 3.582505941390991, "epoch": 4.05, "learning_rate": 3.3046867662252276e-05, "loss": 107.0794, "step": 4793, "task_loss": 2.677927255630493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968397575862541, "compression/movement_sparsity/importance_threshold": -0.00022133499831500543, "compression/movement_sparsity/linear_layer_sparsity": 0.9030402167832755, "compression/movement_sparsity/model_sparsity": 0.8720180072601221, "compression_loss": 102.83450317382812, "distillation_loss": 4.180645942687988, "epoch": 4.05, "learning_rate": 3.304217150370997e-05, "loss": 107.4208, "step": 4794, "task_loss": 2.0192930698394775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9684819239251007, "compression/movement_sparsity/importance_threshold": -0.00022074424685229223, "compression/movement_sparsity/linear_layer_sparsity": 0.9031129661300219, "compression/movement_sparsity/model_sparsity": 0.8720882574430046, "compression_loss": 102.84291076660156, "distillation_loss": 4.118429183959961, "epoch": 4.05, "learning_rate": 3.3037475345167656e-05, "loss": 106.823, "step": 4795, "task_loss": 1.6211737394332886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9685661217685325, "compression/movement_sparsity/importance_threshold": -0.00022015454748451364, "compression/movement_sparsity/linear_layer_sparsity": 0.9030674038854852, "compression/movement_sparsity/model_sparsity": 0.8720442604017337, "compression_loss": 102.85122680664062, "distillation_loss": 4.612369537353516, "epoch": 4.05, "learning_rate": 3.303277918662534e-05, "loss": 106.4381, "step": 4796, "task_loss": 3.711895227432251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9686501695267218, "compression/movement_sparsity/importance_threshold": -0.0002195658992739753, "compression/movement_sparsity/linear_layer_sparsity": 0.9031891019403768, "compression/movement_sparsity/model_sparsity": 0.8721617777540527, "compression_loss": 102.85961151123047, "distillation_loss": 4.436980724334717, "epoch": 4.05, "learning_rate": 3.302808302808303e-05, "loss": 107.3193, "step": 4797, "task_loss": 2.0329346656799316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9687340673335535, "compression/movement_sparsity/importance_threshold": -0.00021897830128298022, "compression/movement_sparsity/linear_layer_sparsity": 0.9032424148938768, "compression/movement_sparsity/model_sparsity": 0.8722132592435901, "compression_loss": 102.86788177490234, "distillation_loss": 4.790655136108398, "epoch": 4.06, "learning_rate": 3.3023386869540715e-05, "loss": 106.6703, "step": 4798, "task_loss": 1.9334609508514404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9688178153229129, "compression/movement_sparsity/importance_threshold": -0.00021839175257383318, "compression/movement_sparsity/linear_layer_sparsity": 0.9032975045483546, "compression/movement_sparsity/model_sparsity": 0.8722664563989608, "compression_loss": 102.87620544433594, "distillation_loss": 3.4879918098449707, "epoch": 4.06, "learning_rate": 3.301869071099841e-05, "loss": 107.1401, "step": 4799, "task_loss": 2.313281536102295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.968901413628685, "compression/movement_sparsity/importance_threshold": -0.00021780625220883896, "compression/movement_sparsity/linear_layer_sparsity": 0.9032866654799735, "compression/movement_sparsity/model_sparsity": 0.8722559896859237, "compression_loss": 102.88446044921875, "distillation_loss": 4.432814121246338, "epoch": 4.06, "learning_rate": 3.301399455245609e-05, "loss": 106.9459, "step": 4800, "task_loss": 2.8138625621795654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9689848623847551, "compression/movement_sparsity/importance_threshold": -0.00021722179925029884, "compression/movement_sparsity/linear_layer_sparsity": 0.9033663308439487, "compression/movement_sparsity/model_sparsity": 0.8723329182995669, "compression_loss": 102.89271545410156, "distillation_loss": 5.30959939956665, "epoch": 4.06, "learning_rate": 3.300929839391378e-05, "loss": 107.7164, "step": 4801, "task_loss": 3.613698720932007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9690681617250083, "compression/movement_sparsity/importance_threshold": -0.00021663839276051845, "compression/movement_sparsity/linear_layer_sparsity": 0.9033493269809, "compression/movement_sparsity/model_sparsity": 0.8723164985715239, "compression_loss": 102.90096282958984, "distillation_loss": 3.8767526149749756, "epoch": 4.06, "learning_rate": 3.300460223537147e-05, "loss": 107.8536, "step": 4802, "task_loss": 1.7695902585983276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9691513117833296, "compression/movement_sparsity/importance_threshold": -0.00021605603180180257, "compression/movement_sparsity/linear_layer_sparsity": 0.9033686679808053, "compression/movement_sparsity/model_sparsity": 0.8723351751485827, "compression_loss": 102.90921783447266, "distillation_loss": 3.9128403663635254, "epoch": 4.06, "learning_rate": 3.299990607682916e-05, "loss": 107.1655, "step": 4803, "task_loss": 2.1121826171875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9692343126936044, "compression/movement_sparsity/importance_threshold": -0.00021547471543645336, "compression/movement_sparsity/linear_layer_sparsity": 0.9034260351513015, "compression/movement_sparsity/model_sparsity": 0.8723905715802902, "compression_loss": 102.91742706298828, "distillation_loss": 5.042122840881348, "epoch": 4.06, "learning_rate": 3.2995209918286846e-05, "loss": 107.3597, "step": 4804, "task_loss": 2.3340232372283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9693171645897176, "compression/movement_sparsity/importance_threshold": -0.00021489444272677558, "compression/movement_sparsity/linear_layer_sparsity": 0.9034353598503927, "compression/movement_sparsity/model_sparsity": 0.8723995759472816, "compression_loss": 102.92564392089844, "distillation_loss": 5.148585796356201, "epoch": 4.06, "learning_rate": 3.2990513759744526e-05, "loss": 107.3898, "step": 4805, "task_loss": 2.1772398948669434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9693998676055544, "compression/movement_sparsity/importance_threshold": -0.00021431521273507486, "compression/movement_sparsity/linear_layer_sparsity": 0.9034389728731864, "compression/movement_sparsity/model_sparsity": 0.8724030648516273, "compression_loss": 102.93388366699219, "distillation_loss": 3.664132595062256, "epoch": 4.06, "learning_rate": 3.298581760120222e-05, "loss": 107.2649, "step": 4806, "task_loss": 2.480259656906128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.969482421875, "compression/movement_sparsity/importance_threshold": -0.0002137370245236525, "compression/movement_sparsity/linear_layer_sparsity": 0.90349721050792, "compression/movement_sparsity/model_sparsity": 0.8724593018444479, "compression_loss": 102.94212341308594, "distillation_loss": 4.195376873016357, "epoch": 4.06, "learning_rate": 3.2981121442659905e-05, "loss": 106.6227, "step": 4807, "task_loss": 1.8583990335464478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9695648275319395, "compression/movement_sparsity/importance_threshold": -0.00021315987715481415, "compression/movement_sparsity/linear_layer_sparsity": 0.9034363376321388, "compression/movement_sparsity/model_sparsity": 0.8724005201392168, "compression_loss": 102.9503173828125, "distillation_loss": 4.824770927429199, "epoch": 4.06, "learning_rate": 3.29764252841176e-05, "loss": 107.5201, "step": 4808, "task_loss": 2.253376007080078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9696470847102581, "compression/movement_sparsity/importance_threshold": -0.00021258376969086282, "compression/movement_sparsity/linear_layer_sparsity": 0.9034531030118349, "compression/movement_sparsity/model_sparsity": 0.8724167095765438, "compression_loss": 102.95848083496094, "distillation_loss": 4.4920525550842285, "epoch": 4.07, "learning_rate": 3.297172912557528e-05, "loss": 107.4236, "step": 4809, "task_loss": 3.5478129386901855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9697291935438409, "compression/movement_sparsity/importance_threshold": -0.0002120087011941033, "compression/movement_sparsity/linear_layer_sparsity": 0.9035288930213284, "compression/movement_sparsity/model_sparsity": 0.872489895966054, "compression_loss": 102.96662902832031, "distillation_loss": 4.278686046600342, "epoch": 4.07, "learning_rate": 3.296703296703297e-05, "loss": 107.5704, "step": 4810, "task_loss": 2.250063419342041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.969811154166573, "compression/movement_sparsity/importance_threshold": -0.00021143467072683858, "compression/movement_sparsity/linear_layer_sparsity": 0.9036065312768056, "compression/movement_sparsity/model_sparsity": 0.8725648671086123, "compression_loss": 102.97474670410156, "distillation_loss": 4.549725532531738, "epoch": 4.07, "learning_rate": 3.296233680849066e-05, "loss": 106.9804, "step": 4811, "task_loss": 2.4518086910247803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9698929667123396, "compression/movement_sparsity/importance_threshold": -0.00021086167735137346, "compression/movement_sparsity/linear_layer_sparsity": 0.9036753217998968, "compression/movement_sparsity/model_sparsity": 0.8726312944656109, "compression_loss": 102.98284912109375, "distillation_loss": 4.317776679992676, "epoch": 4.07, "learning_rate": 3.2957640649948344e-05, "loss": 107.0914, "step": 4812, "task_loss": 2.2881648540496826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9699746313150257, "compression/movement_sparsity/importance_threshold": -0.0002102897201300127, "compression/movement_sparsity/linear_layer_sparsity": 0.9036953663256927, "compression/movement_sparsity/model_sparsity": 0.8726506504002816, "compression_loss": 102.9909439086914, "distillation_loss": 6.259421348571777, "epoch": 4.07, "learning_rate": 3.295294449140603e-05, "loss": 107.7264, "step": 4813, "task_loss": 3.3878540992736816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9700561481085167, "compression/movement_sparsity/importance_threshold": -0.00020971879812505846, "compression/movement_sparsity/linear_layer_sparsity": 0.9037732907611931, "compression/movement_sparsity/model_sparsity": 0.8727258978916989, "compression_loss": 102.99905395507812, "distillation_loss": 3.979527473449707, "epoch": 4.07, "learning_rate": 3.2948248332863716e-05, "loss": 107.2432, "step": 4814, "task_loss": 1.8918050527572632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9701375172266976, "compression/movement_sparsity/importance_threshold": -0.0002091489103988155, "compression/movement_sparsity/linear_layer_sparsity": 0.903816945138908, "compression/movement_sparsity/model_sparsity": 0.8727680526072427, "compression_loss": 103.00713348388672, "distillation_loss": 4.389825344085693, "epoch": 4.07, "learning_rate": 3.294355217432141e-05, "loss": 106.7897, "step": 4815, "task_loss": 1.8579771518707275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9702187388034534, "compression/movement_sparsity/importance_threshold": -0.0002085800560135886, "compression/movement_sparsity/linear_layer_sparsity": 0.9038467555579976, "compression/movement_sparsity/model_sparsity": 0.872796838946729, "compression_loss": 103.01522064208984, "distillation_loss": 4.505827903747559, "epoch": 4.07, "learning_rate": 3.2938856015779096e-05, "loss": 107.0756, "step": 4816, "task_loss": 1.6593960523605347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9702998129726695, "compression/movement_sparsity/importance_threshold": -0.0002080122340316808, "compression/movement_sparsity/linear_layer_sparsity": 0.9039200057197847, "compression/movement_sparsity/model_sparsity": 0.8728675727401148, "compression_loss": 103.02324676513672, "distillation_loss": 4.635760307312012, "epoch": 4.07, "learning_rate": 3.293415985723678e-05, "loss": 107.7039, "step": 4817, "task_loss": 2.437514305114746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9703807398682309, "compression/movement_sparsity/importance_threshold": -0.00020744544351539596, "compression/movement_sparsity/linear_layer_sparsity": 0.9039227244300057, "compression/movement_sparsity/model_sparsity": 0.872870198054276, "compression_loss": 103.03123474121094, "distillation_loss": 3.671464681625366, "epoch": 4.07, "learning_rate": 3.292946369869447e-05, "loss": 107.456, "step": 4818, "task_loss": 1.8804922103881836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9704615196240227, "compression/movement_sparsity/importance_threshold": -0.00020687968352703888, "compression/movement_sparsity/linear_layer_sparsity": 0.903940038321413, "compression/movement_sparsity/model_sparsity": 0.8728869171602497, "compression_loss": 103.03926086425781, "distillation_loss": 2.5917115211486816, "epoch": 4.07, "learning_rate": 3.2924767540152155e-05, "loss": 106.2955, "step": 4819, "task_loss": 2.0098533630371094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9705421523739303, "compression/movement_sparsity/importance_threshold": -0.0002063149531289117, "compression/movement_sparsity/linear_layer_sparsity": 0.9039983832736552, "compression/movement_sparsity/model_sparsity": 0.8729432577838924, "compression_loss": 103.04725646972656, "distillation_loss": 3.6401312351226807, "epoch": 4.07, "learning_rate": 3.292007138160985e-05, "loss": 106.5979, "step": 4820, "task_loss": 2.5357186794281006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9706226382518384, "compression/movement_sparsity/importance_threshold": -0.00020575125138332182, "compression/movement_sparsity/linear_layer_sparsity": 0.9040809223620307, "compression/movement_sparsity/model_sparsity": 0.8730229614006623, "compression_loss": 103.05523681640625, "distillation_loss": 4.618346214294434, "epoch": 4.08, "learning_rate": 3.2915375223067534e-05, "loss": 108.0826, "step": 4821, "task_loss": 2.9333016872406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9707029773916325, "compression/movement_sparsity/importance_threshold": -0.00020518857735256964, "compression/movement_sparsity/linear_layer_sparsity": 0.9042031093077953, "compression/movement_sparsity/model_sparsity": 0.8731409508489488, "compression_loss": 103.06315612792969, "distillation_loss": 3.853951930999756, "epoch": 4.08, "learning_rate": 3.291067906452522e-05, "loss": 107.264, "step": 4822, "task_loss": 2.2856199741363525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9707831699271976, "compression/movement_sparsity/importance_threshold": -0.0002046269300989608, "compression/movement_sparsity/linear_layer_sparsity": 0.9042424590609938, "compression/movement_sparsity/model_sparsity": 0.8731789488170708, "compression_loss": 103.07108306884766, "distillation_loss": 3.9655282497406006, "epoch": 4.08, "learning_rate": 3.290598290598291e-05, "loss": 107.113, "step": 4823, "task_loss": 2.3247575759887695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9708632159924188, "compression/movement_sparsity/importance_threshold": -0.0002040663086848001, "compression/movement_sparsity/linear_layer_sparsity": 0.9042813914683249, "compression/movement_sparsity/model_sparsity": 0.8732165437764401, "compression_loss": 103.07894897460938, "distillation_loss": 5.123559951782227, "epoch": 4.08, "learning_rate": 3.290128674744059e-05, "loss": 107.5273, "step": 4824, "task_loss": 2.219270706176758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9709431157211814, "compression/movement_sparsity/importance_threshold": -0.00020350671217238964, "compression/movement_sparsity/linear_layer_sparsity": 0.9042884744239005, "compression/movement_sparsity/model_sparsity": 0.873223383410702, "compression_loss": 103.08683776855469, "distillation_loss": 5.56449031829834, "epoch": 4.08, "learning_rate": 3.2896590588898286e-05, "loss": 107.6456, "step": 4825, "task_loss": 3.9072721004486084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9710228692473704, "compression/movement_sparsity/importance_threshold": -0.0002029481396240351, "compression/movement_sparsity/linear_layer_sparsity": 0.9043625712015898, "compression/movement_sparsity/model_sparsity": 0.8732949347361293, "compression_loss": 103.0947265625, "distillation_loss": 6.15261173248291, "epoch": 4.08, "learning_rate": 3.2891894430355966e-05, "loss": 107.5957, "step": 4826, "task_loss": 2.930346965789795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.971102476704871, "compression/movement_sparsity/importance_threshold": -0.00020239059010203776, "compression/movement_sparsity/linear_layer_sparsity": 0.9044721185296605, "compression/movement_sparsity/model_sparsity": 0.8734007187764737, "compression_loss": 103.10258483886719, "distillation_loss": 4.318051815032959, "epoch": 4.08, "learning_rate": 3.288719827181366e-05, "loss": 107.6706, "step": 4827, "task_loss": 1.484450340270996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9711819382275683, "compression/movement_sparsity/importance_threshold": -0.00020183406266870586, "compression/movement_sparsity/linear_layer_sparsity": 0.9045737601345886, "compression/movement_sparsity/model_sparsity": 0.8734988686795864, "compression_loss": 103.11041259765625, "distillation_loss": 3.511653423309326, "epoch": 4.08, "learning_rate": 3.2882502113271346e-05, "loss": 107.3884, "step": 4828, "task_loss": 2.230194330215454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9712612539493475, "compression/movement_sparsity/importance_threshold": -0.00020127855638633983, "compression/movement_sparsity/linear_layer_sparsity": 0.904613729944504, "compression/movement_sparsity/model_sparsity": 0.8735374654035697, "compression_loss": 103.11822509765625, "distillation_loss": 3.714754581451416, "epoch": 4.08, "learning_rate": 3.287780595472903e-05, "loss": 107.136, "step": 4829, "task_loss": 2.2605957984924316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9713404240040936, "compression/movement_sparsity/importance_threshold": -0.0002007240703172453, "compression/movement_sparsity/linear_layer_sparsity": 0.9046947427394221, "compression/movement_sparsity/model_sparsity": 0.8736156951597578, "compression_loss": 103.12602233886719, "distillation_loss": 3.527259111404419, "epoch": 4.08, "learning_rate": 3.287310979618672e-05, "loss": 107.921, "step": 4830, "task_loss": 2.270867109298706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.971419448525692, "compression/movement_sparsity/importance_threshold": -0.0002001706035237253, "compression/movement_sparsity/linear_layer_sparsity": 0.9047635332625134, "compression/movement_sparsity/model_sparsity": 0.8736821225167565, "compression_loss": 103.13387298583984, "distillation_loss": 4.302008628845215, "epoch": 4.08, "learning_rate": 3.2868413637644405e-05, "loss": 107.1231, "step": 4831, "task_loss": 2.0838630199432373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9714983276480276, "compression/movement_sparsity/importance_threshold": -0.0001996181550680846, "compression/movement_sparsity/linear_layer_sparsity": 0.9048708149987332, "compression/movement_sparsity/model_sparsity": 0.8737857187953, "compression_loss": 103.14170837402344, "distillation_loss": 4.88677978515625, "epoch": 4.08, "learning_rate": 3.28637174791021e-05, "loss": 107.0759, "step": 4832, "task_loss": 2.2611024379730225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9715770615049857, "compression/movement_sparsity/importance_threshold": -0.00019906672401262624, "compression/movement_sparsity/linear_layer_sparsity": 0.9048633623939608, "compression/movement_sparsity/model_sparsity": 0.8737785222104284, "compression_loss": 103.14945983886719, "distillation_loss": 4.021677017211914, "epoch": 4.09, "learning_rate": 3.2859021320559784e-05, "loss": 107.8425, "step": 4833, "task_loss": 2.316464900970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9716556502304512, "compression/movement_sparsity/importance_threshold": -0.0001985163094196567, "compression/movement_sparsity/linear_layer_sparsity": 0.9049988805591423, "compression/movement_sparsity/model_sparsity": 0.8739093849097334, "compression_loss": 103.15727996826172, "distillation_loss": 5.544647216796875, "epoch": 4.09, "learning_rate": 3.285432516201747e-05, "loss": 107.9736, "step": 4834, "task_loss": 2.5688581466674805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9717340939583096, "compression/movement_sparsity/importance_threshold": -0.00019796691035147643, "compression/movement_sparsity/linear_layer_sparsity": 0.9049906767318089, "compression/movement_sparsity/model_sparsity": 0.8739014629091068, "compression_loss": 103.1650161743164, "distillation_loss": 6.0070343017578125, "epoch": 4.09, "learning_rate": 3.284962900347516e-05, "loss": 107.7886, "step": 4835, "task_loss": 3.17558217048645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9718123928224458, "compression/movement_sparsity/importance_threshold": -0.00019741852587039106, "compression/movement_sparsity/linear_layer_sparsity": 0.9051158804919854, "compression/movement_sparsity/model_sparsity": 0.8740223655349494, "compression_loss": 103.17269134521484, "distillation_loss": 2.9787697792053223, "epoch": 4.09, "learning_rate": 3.284493284493284e-05, "loss": 106.935, "step": 4836, "task_loss": 2.970285415649414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9718905469567449, "compression/movement_sparsity/importance_threshold": -0.00019687115503870534, "compression/movement_sparsity/linear_layer_sparsity": 0.9051511044831818, "compression/movement_sparsity/model_sparsity": 0.8740563794736864, "compression_loss": 103.18045043945312, "distillation_loss": 4.196341514587402, "epoch": 4.09, "learning_rate": 3.2840236686390536e-05, "loss": 107.6158, "step": 4837, "task_loss": 2.448638916015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9719685564950922, "compression/movement_sparsity/importance_threshold": -0.0001963247969187223, "compression/movement_sparsity/linear_layer_sparsity": 0.9051163097620203, "compression/movement_sparsity/model_sparsity": 0.874022780058238, "compression_loss": 103.18806457519531, "distillation_loss": 4.77727746963501, "epoch": 4.09, "learning_rate": 3.283554052784822e-05, "loss": 107.6622, "step": 4838, "task_loss": 3.03879451751709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9720464215713728, "compression/movement_sparsity/importance_threshold": -0.00019577945057274587, "compression/movement_sparsity/linear_layer_sparsity": 0.9050762922554344, "compression/movement_sparsity/model_sparsity": 0.8739841372761115, "compression_loss": 103.19576263427734, "distillation_loss": 5.401034355163574, "epoch": 4.09, "learning_rate": 3.283084436930591e-05, "loss": 107.5658, "step": 4839, "task_loss": 3.071499824523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9721241423194718, "compression/movement_sparsity/importance_threshold": -0.00019523511506307992, "compression/movement_sparsity/linear_layer_sparsity": 0.9050918294458639, "compression/movement_sparsity/model_sparsity": 0.8739991407162518, "compression_loss": 103.2033462524414, "distillation_loss": 4.5809197425842285, "epoch": 4.09, "learning_rate": 3.2826148210763595e-05, "loss": 107.6824, "step": 4840, "task_loss": 2.493220567703247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9722017188732743, "compression/movement_sparsity/importance_threshold": -0.00019469178945202923, "compression/movement_sparsity/linear_layer_sparsity": 0.9051056972528244, "compression/movement_sparsity/model_sparsity": 0.8740125321213809, "compression_loss": 103.2109603881836, "distillation_loss": 5.460968017578125, "epoch": 4.09, "learning_rate": 3.282145205222129e-05, "loss": 107.9276, "step": 4841, "task_loss": 2.878288745880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9722791513666655, "compression/movement_sparsity/importance_threshold": -0.0001941494728018977, "compression/movement_sparsity/linear_layer_sparsity": 0.9051503174881178, "compression/movement_sparsity/model_sparsity": 0.874055619514324, "compression_loss": 103.21862030029297, "distillation_loss": 3.1633620262145996, "epoch": 4.09, "learning_rate": 3.2816755893678975e-05, "loss": 107.3947, "step": 4842, "task_loss": 1.761926531791687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9723564399335306, "compression/movement_sparsity/importance_threshold": -0.00019360816417498834, "compression/movement_sparsity/linear_layer_sparsity": 0.9051570307944968, "compression/movement_sparsity/model_sparsity": 0.8740621021979763, "compression_loss": 103.22618865966797, "distillation_loss": 4.061526775360107, "epoch": 4.09, "learning_rate": 3.281205973513666e-05, "loss": 107.2181, "step": 4843, "task_loss": 2.8614678382873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9724335847077547, "compression/movement_sparsity/importance_threshold": -0.00019306786263360593, "compression/movement_sparsity/linear_layer_sparsity": 0.9052637520948378, "compression/movement_sparsity/model_sparsity": 0.8741651572933374, "compression_loss": 103.23377990722656, "distillation_loss": 3.236111879348755, "epoch": 4.09, "learning_rate": 3.280736357659435e-05, "loss": 107.4518, "step": 4844, "task_loss": 1.4589552879333496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9725105858232228, "compression/movement_sparsity/importance_threshold": -0.00019252856724005438, "compression/movement_sparsity/linear_layer_sparsity": 0.9053354878873351, "compression/movement_sparsity/model_sparsity": 0.8742344287406775, "compression_loss": 103.2413330078125, "distillation_loss": 2.675389289855957, "epoch": 4.1, "learning_rate": 3.2802667418052034e-05, "loss": 106.5977, "step": 4845, "task_loss": 1.2069915533065796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9725874434138202, "compression/movement_sparsity/importance_threshold": -0.00019199027705663757, "compression/movement_sparsity/linear_layer_sparsity": 0.9053688159358774, "compression/movement_sparsity/model_sparsity": 0.8742666118682232, "compression_loss": 103.24886322021484, "distillation_loss": 3.2741708755493164, "epoch": 4.1, "learning_rate": 3.279797125950973e-05, "loss": 107.7795, "step": 4846, "task_loss": 3.3193116188049316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9726641576134321, "compression/movement_sparsity/importance_threshold": -0.00019145299114565854, "compression/movement_sparsity/linear_layer_sparsity": 0.905392700043652, "compression/movement_sparsity/model_sparsity": 0.8742896754834196, "compression_loss": 103.25633239746094, "distillation_loss": 6.7563862800598145, "epoch": 4.1, "learning_rate": 3.2793275100967406e-05, "loss": 108.3913, "step": 4847, "task_loss": 4.259468078613281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9727407285559434, "compression/movement_sparsity/importance_threshold": -0.00019091670856942293, "compression/movement_sparsity/linear_layer_sparsity": 0.9054963210604077, "compression/movement_sparsity/model_sparsity": 0.8743897367994742, "compression_loss": 103.26383972167969, "distillation_loss": 5.104982376098633, "epoch": 4.1, "learning_rate": 3.27885789424251e-05, "loss": 107.8564, "step": 4848, "task_loss": 2.2054660320281982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9728171563752394, "compression/movement_sparsity/importance_threshold": -0.00019038142839023463, "compression/movement_sparsity/linear_layer_sparsity": 0.9055548448751645, "compression/movement_sparsity/model_sparsity": 0.8744462501411538, "compression_loss": 103.2713623046875, "distillation_loss": 5.091462135314941, "epoch": 4.1, "learning_rate": 3.2783882783882786e-05, "loss": 107.9965, "step": 4849, "task_loss": 2.056246280670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9728934412052053, "compression/movement_sparsity/importance_threshold": -0.0001898471496703958, "compression/movement_sparsity/linear_layer_sparsity": 0.9055949220025886, "compression/movement_sparsity/model_sparsity": 0.8744849504959593, "compression_loss": 103.27880096435547, "distillation_loss": 3.1431407928466797, "epoch": 4.1, "learning_rate": 3.277918662534047e-05, "loss": 107.3283, "step": 4850, "task_loss": 2.1383557319641113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.972969583179726, "compression/movement_sparsity/importance_threshold": -0.00018931387147221208, "compression/movement_sparsity/linear_layer_sparsity": 0.9056764356125474, "compression/movement_sparsity/model_sparsity": 0.8745636638626507, "compression_loss": 103.28632354736328, "distillation_loss": 3.8292155265808105, "epoch": 4.1, "learning_rate": 3.2774490466798165e-05, "loss": 107.2169, "step": 4851, "task_loss": 1.37556791305542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9730455824326869, "compression/movement_sparsity/importance_threshold": -0.00018878159285798737, "compression/movement_sparsity/linear_layer_sparsity": 0.9057316206603662, "compression/movement_sparsity/model_sparsity": 0.8746169531343079, "compression_loss": 103.2937240600586, "distillation_loss": 3.828979969024658, "epoch": 4.1, "learning_rate": 3.2769794308255845e-05, "loss": 107.5758, "step": 4852, "task_loss": 2.2090728282928467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.973121439097973, "compression/movement_sparsity/importance_threshold": -0.0001882503128900247, "compression/movement_sparsity/linear_layer_sparsity": 0.9056524918839345, "compression/movement_sparsity/model_sparsity": 0.8745405426747753, "compression_loss": 103.30120849609375, "distillation_loss": 4.179638862609863, "epoch": 4.1, "learning_rate": 3.276509814971354e-05, "loss": 108.3649, "step": 4853, "task_loss": 2.1499128341674805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9731971533094697, "compression/movement_sparsity/importance_threshold": -0.00018772003063062797, "compression/movement_sparsity/linear_layer_sparsity": 0.9057002004786456, "compression/movement_sparsity/model_sparsity": 0.8745866123324892, "compression_loss": 103.30866241455078, "distillation_loss": 5.240311622619629, "epoch": 4.1, "learning_rate": 3.2760401991171224e-05, "loss": 107.6471, "step": 4854, "task_loss": 2.432844877243042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9732727252010617, "compression/movement_sparsity/importance_threshold": -0.0001871907451421028, "compression/movement_sparsity/linear_layer_sparsity": 0.9057916111477422, "compression/movement_sparsity/model_sparsity": 0.8746748827638903, "compression_loss": 103.31600189208984, "distillation_loss": 5.067829608917236, "epoch": 4.1, "learning_rate": 3.275570583262891e-05, "loss": 107.9471, "step": 4855, "task_loss": 2.5357766151428223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9733481549066343, "compression/movement_sparsity/importance_threshold": -0.00018666245548675225, "compression/movement_sparsity/linear_layer_sparsity": 0.9057904664276492, "compression/movement_sparsity/model_sparsity": 0.874673777368454, "compression_loss": 103.32350158691406, "distillation_loss": 6.4371843338012695, "epoch": 4.1, "learning_rate": 3.27510096740866e-05, "loss": 107.5209, "step": 4856, "task_loss": 3.6523308753967285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9734234425600728, "compression/movement_sparsity/importance_threshold": -0.00018613516072687932, "compression/movement_sparsity/linear_layer_sparsity": 0.9058622856893199, "compression/movement_sparsity/model_sparsity": 0.8747431294175445, "compression_loss": 103.33088684082031, "distillation_loss": 4.006731986999512, "epoch": 4.11, "learning_rate": 3.274631351554428e-05, "loss": 107.9295, "step": 4857, "task_loss": 2.3039591312408447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9734985882952623, "compression/movement_sparsity/importance_threshold": -0.00018560885992478966, "compression/movement_sparsity/linear_layer_sparsity": 0.9059462914503146, "compression/movement_sparsity/model_sparsity": 0.874824249322217, "compression_loss": 103.3382339477539, "distillation_loss": 5.863892555236816, "epoch": 4.11, "learning_rate": 3.2741617357001976e-05, "loss": 108.671, "step": 4858, "task_loss": 3.5703442096710205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9735735922460879, "compression/movement_sparsity/importance_threshold": -0.00018508355214278543, "compression/movement_sparsity/linear_layer_sparsity": 0.9059895761788328, "compression/movement_sparsity/model_sparsity": 0.8748660470871512, "compression_loss": 103.34549713134766, "distillation_loss": 4.333067893981934, "epoch": 4.11, "learning_rate": 3.273692119845966e-05, "loss": 107.627, "step": 4859, "task_loss": 2.58748197555542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9736484545464347, "compression/movement_sparsity/importance_threshold": -0.0001845592364431714, "compression/movement_sparsity/linear_layer_sparsity": 0.9059760780210689, "compression/movement_sparsity/model_sparsity": 0.8748530126326317, "compression_loss": 103.35281372070312, "distillation_loss": 5.874433517456055, "epoch": 4.11, "learning_rate": 3.273222503991735e-05, "loss": 107.8407, "step": 4860, "task_loss": 2.864546298980713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9737231753301877, "compression/movement_sparsity/importance_threshold": -0.0001840359118882532, "compression/movement_sparsity/linear_layer_sparsity": 0.9060997435636204, "compression/movement_sparsity/model_sparsity": 0.874972429883357, "compression_loss": 103.36009216308594, "distillation_loss": 5.162848472595215, "epoch": 4.11, "learning_rate": 3.2727528881375035e-05, "loss": 107.5571, "step": 4861, "task_loss": 3.739835739135742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9737977547312324, "compression/movement_sparsity/importance_threshold": -0.00018351357754033213, "compression/movement_sparsity/linear_layer_sparsity": 0.906137090056656, "compression/movement_sparsity/model_sparsity": 0.8750084934094654, "compression_loss": 103.36736297607422, "distillation_loss": 5.618568420410156, "epoch": 4.11, "learning_rate": 3.272283272283272e-05, "loss": 107.5981, "step": 4862, "task_loss": 3.8957080841064453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9738721928834537, "compression/movement_sparsity/importance_threshold": -0.00018299223246171382, "compression/movement_sparsity/linear_layer_sparsity": 0.9061086270685093, "compression/movement_sparsity/model_sparsity": 0.8749810082125239, "compression_loss": 103.37456512451172, "distillation_loss": 3.5987634658813477, "epoch": 4.11, "learning_rate": 3.2718136564290415e-05, "loss": 107.6706, "step": 4863, "task_loss": 3.310443878173828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9739464899207367, "compression/movement_sparsity/importance_threshold": -0.00018247187571470216, "compression/movement_sparsity/linear_layer_sparsity": 0.9061518879486922, "compression/movement_sparsity/model_sparsity": 0.8750227829483864, "compression_loss": 103.38175201416016, "distillation_loss": 3.966580867767334, "epoch": 4.11, "learning_rate": 3.2713440405748094e-05, "loss": 107.1039, "step": 4864, "task_loss": 3.165783166885376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9740206459769667, "compression/movement_sparsity/importance_threshold": -0.00018195250636160107, "compression/movement_sparsity/linear_layer_sparsity": 0.9060907885137259, "compression/movement_sparsity/model_sparsity": 0.8749637824669753, "compression_loss": 103.38908386230469, "distillation_loss": 5.530395030975342, "epoch": 4.11, "learning_rate": 3.270874424720579e-05, "loss": 108.2447, "step": 4865, "task_loss": 3.1670470237731934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9740946611860288, "compression/movement_sparsity/importance_threshold": -0.00018143412346471356, "compression/movement_sparsity/linear_layer_sparsity": 0.9061336439722093, "compression/movement_sparsity/model_sparsity": 0.8750051657086209, "compression_loss": 103.39628601074219, "distillation_loss": 4.051733016967773, "epoch": 4.11, "learning_rate": 3.2704048088663474e-05, "loss": 107.5284, "step": 4866, "task_loss": 1.704953908920288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9741685356818082, "compression/movement_sparsity/importance_threshold": -0.00018091672608634354, "compression/movement_sparsity/linear_layer_sparsity": 0.9061666739165606, "compression/movement_sparsity/model_sparsity": 0.8750370609727717, "compression_loss": 103.40347290039062, "distillation_loss": 4.476462364196777, "epoch": 4.11, "learning_rate": 3.269935193012117e-05, "loss": 107.9956, "step": 4867, "task_loss": 2.446237087249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9742422695981897, "compression/movement_sparsity/importance_threshold": -0.0001804003132887975, "compression/movement_sparsity/linear_layer_sparsity": 0.906196329321471, "compression/movement_sparsity/model_sparsity": 0.8750656976232928, "compression_loss": 103.41069793701172, "distillation_loss": 5.960323810577393, "epoch": 4.11, "learning_rate": 3.269465577157885e-05, "loss": 108.2353, "step": 4868, "task_loss": 3.4745163917541504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9743158630690588, "compression/movement_sparsity/importance_threshold": -0.00017988488413437677, "compression/movement_sparsity/linear_layer_sparsity": 0.906242368532713, "compression/movement_sparsity/model_sparsity": 0.8751101552459954, "compression_loss": 103.41789245605469, "distillation_loss": 4.863421440124512, "epoch": 4.12, "learning_rate": 3.268995961303653e-05, "loss": 108.0017, "step": 4869, "task_loss": 1.4490094184875488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9743893162283006, "compression/movement_sparsity/importance_threshold": -0.00017937043768538607, "compression/movement_sparsity/linear_layer_sparsity": 0.9062592173815825, "compression/movement_sparsity/model_sparsity": 0.8751264252850731, "compression_loss": 103.42511749267578, "distillation_loss": 3.9651474952697754, "epoch": 4.12, "learning_rate": 3.2685263454494226e-05, "loss": 106.965, "step": 4870, "task_loss": 2.2308053970336914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9744626292098001, "compression/movement_sparsity/importance_threshold": -0.0001788569730041302, "compression/movement_sparsity/linear_layer_sparsity": 0.906255401647939, "compression/movement_sparsity/model_sparsity": 0.8751227406336188, "compression_loss": 103.43228912353516, "distillation_loss": 4.086702346801758, "epoch": 4.12, "learning_rate": 3.268056729595191e-05, "loss": 107.0643, "step": 4871, "task_loss": 2.674440383911133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9745358021474425, "compression/movement_sparsity/importance_threshold": -0.00017834448915291217, "compression/movement_sparsity/linear_layer_sparsity": 0.9061897591051036, "compression/movement_sparsity/model_sparsity": 0.8750593531140699, "compression_loss": 103.43948364257812, "distillation_loss": 6.035675048828125, "epoch": 4.12, "learning_rate": 3.2675871137409605e-05, "loss": 108.6245, "step": 4872, "task_loss": 3.1565182209014893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.974608835175113, "compression/movement_sparsity/importance_threshold": -0.00017783298519403676, "compression/movement_sparsity/linear_layer_sparsity": 0.9062934993635356, "compression/movement_sparsity/model_sparsity": 0.8751595295754825, "compression_loss": 103.44657897949219, "distillation_loss": 4.713630676269531, "epoch": 4.12, "learning_rate": 3.2671174978867285e-05, "loss": 107.5689, "step": 4873, "task_loss": 2.0402321815490723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9746817284266966, "compression/movement_sparsity/importance_threshold": -0.000177322460189807, "compression/movement_sparsity/linear_layer_sparsity": 0.9063929230732835, "compression/movement_sparsity/model_sparsity": 0.8752555377749374, "compression_loss": 103.45375061035156, "distillation_loss": 3.904275894165039, "epoch": 4.12, "learning_rate": 3.266647882032498e-05, "loss": 107.3187, "step": 4874, "task_loss": 2.783564805984497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9747544820360787, "compression/movement_sparsity/importance_threshold": -0.00017681291320252677, "compression/movement_sparsity/linear_layer_sparsity": 0.9064312354238975, "compression/movement_sparsity/model_sparsity": 0.8752925339784452, "compression_loss": 103.4608383178711, "distillation_loss": 4.348126411437988, "epoch": 4.12, "learning_rate": 3.2661782661782664e-05, "loss": 107.9443, "step": 4875, "task_loss": 2.3940958976745605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9748270961371442, "compression/movement_sparsity/importance_threshold": -0.00017630434329450086, "compression/movement_sparsity/linear_layer_sparsity": 0.9064873863292947, "compression/movement_sparsity/model_sparsity": 0.8753467559275017, "compression_loss": 103.46794891357422, "distillation_loss": 3.9418463706970215, "epoch": 4.12, "learning_rate": 3.265708650324035e-05, "loss": 107.9741, "step": 4876, "task_loss": 1.3091652393341064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9748995708637783, "compression/movement_sparsity/importance_threshold": -0.00017579674952803315, "compression/movement_sparsity/linear_layer_sparsity": 0.9065482949775787, "compression/movement_sparsity/model_sparsity": 0.8754055721763402, "compression_loss": 103.47506713867188, "distillation_loss": 4.854995250701904, "epoch": 4.12, "learning_rate": 3.265239034469804e-05, "loss": 107.8088, "step": 4877, "task_loss": 3.5786190032958984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9749719063498662, "compression/movement_sparsity/importance_threshold": -0.00017529013096542756, "compression/movement_sparsity/linear_layer_sparsity": 0.9066035157979004, "compression/movement_sparsity/model_sparsity": 0.8754588959916048, "compression_loss": 103.48211669921875, "distillation_loss": 6.065943717956543, "epoch": 4.12, "learning_rate": 3.2647694186155723e-05, "loss": 108.0226, "step": 4878, "task_loss": 4.0540385246276855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975044102729293, "compression/movement_sparsity/importance_threshold": -0.00017478448666898797, "compression/movement_sparsity/linear_layer_sparsity": 0.9066356275813439, "compression/movement_sparsity/model_sparsity": 0.8754899046364994, "compression_loss": 103.48915100097656, "distillation_loss": 4.033247470855713, "epoch": 4.12, "learning_rate": 3.2642998027613417e-05, "loss": 107.1277, "step": 4879, "task_loss": 1.6673756837844849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9751161601359437, "compression/movement_sparsity/importance_threshold": -0.00017427981570101916, "compression/movement_sparsity/linear_layer_sparsity": 0.9067174035229906, "compression/movement_sparsity/model_sparsity": 0.8755688713229783, "compression_loss": 103.49620819091797, "distillation_loss": 5.9304914474487305, "epoch": 4.13, "learning_rate": 3.26383018690711e-05, "loss": 107.9008, "step": 4880, "task_loss": 2.5920910835266113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9751880787037037, "compression/movement_sparsity/importance_threshold": -0.0001737761171238224, "compression/movement_sparsity/linear_layer_sparsity": 0.9067533191159097, "compression/movement_sparsity/model_sparsity": 0.8756035531047915, "compression_loss": 103.50323486328125, "distillation_loss": 6.632582664489746, "epoch": 4.13, "learning_rate": 3.263360571052879e-05, "loss": 109.2561, "step": 4881, "task_loss": 2.338265895843506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.975259858566458, "compression/movement_sparsity/importance_threshold": -0.0001732733899997051, "compression/movement_sparsity/linear_layer_sparsity": 0.9067915956940209, "compression/movement_sparsity/model_sparsity": 0.875640514764692, "compression_loss": 103.51026916503906, "distillation_loss": 3.9054248332977295, "epoch": 4.13, "learning_rate": 3.2628909551986476e-05, "loss": 108.0137, "step": 4882, "task_loss": 2.698185682296753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9753314998580918, "compression/movement_sparsity/importance_threshold": -0.0001727716333909694, "compression/movement_sparsity/linear_layer_sparsity": 0.9067980824412148, "compression/movement_sparsity/model_sparsity": 0.8756467786721642, "compression_loss": 103.51720428466797, "distillation_loss": 4.495738506317139, "epoch": 4.13, "learning_rate": 3.262421339344416e-05, "loss": 107.8718, "step": 4883, "task_loss": 3.1437809467315674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9754030027124901, "compression/movement_sparsity/importance_threshold": -0.0001722708463599192, "compression/movement_sparsity/linear_layer_sparsity": 0.9068137269491531, "compression/movement_sparsity/model_sparsity": 0.8756618857431266, "compression_loss": 103.5242691040039, "distillation_loss": 5.174014091491699, "epoch": 4.13, "learning_rate": 3.2619517234901855e-05, "loss": 107.2893, "step": 4884, "task_loss": 3.178811550140381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9754743672635382, "compression/movement_sparsity/importance_threshold": -0.00017177102796885925, "compression/movement_sparsity/linear_layer_sparsity": 0.9068892903994615, "compression/movement_sparsity/model_sparsity": 0.8757348533564566, "compression_loss": 103.53117370605469, "distillation_loss": 3.480297088623047, "epoch": 4.13, "learning_rate": 3.261482107635954e-05, "loss": 107.0925, "step": 4885, "task_loss": 1.2061827182769775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9755455936451212, "compression/movement_sparsity/importance_threshold": -0.00017127217728009347, "compression/movement_sparsity/linear_layer_sparsity": 0.9069658912523543, "compression/movement_sparsity/model_sparsity": 0.8758088227344007, "compression_loss": 103.5381088256836, "distillation_loss": 4.13878870010376, "epoch": 4.13, "learning_rate": 3.261012491781723e-05, "loss": 108.2381, "step": 4886, "task_loss": 2.4863176345825195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9756166819911242, "compression/movement_sparsity/importance_threshold": -0.0001707742933559249, "compression/movement_sparsity/linear_layer_sparsity": 0.9070317365060395, "compression/movement_sparsity/model_sparsity": 0.8758724060010582, "compression_loss": 103.54505157470703, "distillation_loss": 4.562363147735596, "epoch": 4.13, "learning_rate": 3.2605428759274914e-05, "loss": 107.5238, "step": 4887, "task_loss": 2.006821870803833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9756876324354323, "compression/movement_sparsity/importance_threshold": -0.00017027737525865828, "compression/movement_sparsity/linear_layer_sparsity": 0.9071191048823075, "compression/movement_sparsity/model_sparsity": 0.8759567730048249, "compression_loss": 103.55194854736328, "distillation_loss": 4.552233695983887, "epoch": 4.13, "learning_rate": 3.26007326007326e-05, "loss": 107.9583, "step": 4888, "task_loss": 2.208235025405884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9757584451119308, "compression/movement_sparsity/importance_threshold": -0.0001697814220505984, "compression/movement_sparsity/linear_layer_sparsity": 0.9071163027029131, "compression/movement_sparsity/model_sparsity": 0.8759540670889131, "compression_loss": 103.55878448486328, "distillation_loss": 3.906327486038208, "epoch": 4.13, "learning_rate": 3.2596036442190294e-05, "loss": 107.3075, "step": 4889, "task_loss": 1.4058661460876465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9758291201545047, "compression/movement_sparsity/importance_threshold": -0.00016928643279404652, "compression/movement_sparsity/linear_layer_sparsity": 0.9070737691969559, "compression/movement_sparsity/model_sparsity": 0.875912994739734, "compression_loss": 103.56568908691406, "distillation_loss": 5.188601493835449, "epoch": 4.13, "learning_rate": 3.259134028364797e-05, "loss": 108.3778, "step": 4890, "task_loss": 3.2189619541168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9758996576970391, "compression/movement_sparsity/importance_threshold": -0.00016879240655131005, "compression/movement_sparsity/linear_layer_sparsity": 0.9071194387590014, "compression/movement_sparsity/model_sparsity": 0.8759570954118271, "compression_loss": 103.57255554199219, "distillation_loss": 2.815682888031006, "epoch": 4.13, "learning_rate": 3.2586644125105666e-05, "loss": 107.6905, "step": 4891, "task_loss": 1.708917498588562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9759700578734194, "compression/movement_sparsity/importance_threshold": -0.00016829934238469027, "compression/movement_sparsity/linear_layer_sparsity": 0.9072219031314963, "compression/movement_sparsity/model_sparsity": 0.8760560398179096, "compression_loss": 103.57942962646484, "distillation_loss": 3.949026584625244, "epoch": 4.14, "learning_rate": 3.258194796656335e-05, "loss": 108.15, "step": 4892, "task_loss": 2.611680507659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9760403208175304, "compression/movement_sparsity/importance_threshold": -0.0001678072393564928, "compression/movement_sparsity/linear_layer_sparsity": 0.9071974108911722, "compression/movement_sparsity/model_sparsity": 0.8760323889613876, "compression_loss": 103.58628845214844, "distillation_loss": 3.855121612548828, "epoch": 4.14, "learning_rate": 3.257725180802104e-05, "loss": 108.0727, "step": 4893, "task_loss": 2.789794445037842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9761104466632575, "compression/movement_sparsity/importance_threshold": -0.0001673160965290207, "compression/movement_sparsity/linear_layer_sparsity": 0.9072110879114506, "compression/movement_sparsity/model_sparsity": 0.876045596133944, "compression_loss": 103.5931625366211, "distillation_loss": 4.876151084899902, "epoch": 4.14, "learning_rate": 3.2572555649478725e-05, "loss": 107.6805, "step": 4894, "task_loss": 3.3561418056488037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9761804355444857, "compression/movement_sparsity/importance_threshold": -0.00016682591296457872, "compression/movement_sparsity/linear_layer_sparsity": 0.9072490544612032, "compression/movement_sparsity/model_sparsity": 0.8760822584159138, "compression_loss": 103.60001373291016, "distillation_loss": 3.6320643424987793, "epoch": 4.14, "learning_rate": 3.256785949093641e-05, "loss": 107.8973, "step": 4895, "task_loss": 2.39764666557312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9762502875951002, "compression/movement_sparsity/importance_threshold": -0.0001663366877254699, "compression/movement_sparsity/linear_layer_sparsity": 0.907303810238987, "compression/movement_sparsity/model_sparsity": 0.8761351331642823, "compression_loss": 103.60682678222656, "distillation_loss": 3.4754083156585693, "epoch": 4.14, "learning_rate": 3.2563163332394105e-05, "loss": 107.8774, "step": 4896, "task_loss": 2.050096273422241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9763200029489862, "compression/movement_sparsity/importance_threshold": -0.0001658484198739981, "compression/movement_sparsity/linear_layer_sparsity": 0.9073405366753055, "compression/movement_sparsity/model_sparsity": 0.8761705979345294, "compression_loss": 103.61361694335938, "distillation_loss": 3.674201250076294, "epoch": 4.14, "learning_rate": 3.255846717385179e-05, "loss": 107.8859, "step": 4897, "task_loss": 2.7391393184661865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9763895817400288, "compression/movement_sparsity/importance_threshold": -0.000165361108472469, "compression/movement_sparsity/linear_layer_sparsity": 0.9073619286320442, "compression/movement_sparsity/model_sparsity": 0.8761912550117449, "compression_loss": 103.6203842163086, "distillation_loss": 6.105525016784668, "epoch": 4.14, "learning_rate": 3.2553771015309484e-05, "loss": 108.0989, "step": 4898, "task_loss": 3.5748777389526367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9764590241021129, "compression/movement_sparsity/importance_threshold": -0.00016487475258318476, "compression/movement_sparsity/linear_layer_sparsity": 0.9073730538804485, "compression/movement_sparsity/model_sparsity": 0.8762019980736412, "compression_loss": 103.6270751953125, "distillation_loss": 4.935971736907959, "epoch": 4.14, "learning_rate": 3.2549074856767164e-05, "loss": 108.6442, "step": 4899, "task_loss": 2.0858206748962402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9765283301691241, "compression/movement_sparsity/importance_threshold": -0.00016438935126845012, "compression/movement_sparsity/linear_layer_sparsity": 0.9074356676847044, "compression/movement_sparsity/model_sparsity": 0.8762624609010983, "compression_loss": 103.63385009765625, "distillation_loss": 4.4408488273620605, "epoch": 4.14, "learning_rate": 3.254437869822485e-05, "loss": 107.5947, "step": 4900, "task_loss": 2.2618634700775146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9765975000749472, "compression/movement_sparsity/importance_threshold": -0.000163904903590569, "compression/movement_sparsity/linear_layer_sparsity": 0.9075383705405521, "compression/movement_sparsity/model_sparsity": 0.8763616355978967, "compression_loss": 103.64054870605469, "distillation_loss": 3.311948299407959, "epoch": 4.14, "learning_rate": 3.253968253968254e-05, "loss": 107.9093, "step": 4901, "task_loss": 2.1013102531433105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9766665339534673, "compression/movement_sparsity/importance_threshold": -0.00016342140861184701, "compression/movement_sparsity/linear_layer_sparsity": 0.9075510697790843, "compression/movement_sparsity/model_sparsity": 0.8763738985785179, "compression_loss": 103.64723205566406, "distillation_loss": 3.713339328765869, "epoch": 4.14, "learning_rate": 3.253498638114023e-05, "loss": 107.8918, "step": 4902, "task_loss": 2.350912570953369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9767354319385698, "compression/movement_sparsity/importance_threshold": -0.0001629388653945846, "compression/movement_sparsity/linear_layer_sparsity": 0.9075860672110956, "compression/movement_sparsity/model_sparsity": 0.8764076937410749, "compression_loss": 103.65388488769531, "distillation_loss": 5.604605674743652, "epoch": 4.14, "learning_rate": 3.2530290222597916e-05, "loss": 108.2154, "step": 4903, "task_loss": 2.82411789894104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9768041941641397, "compression/movement_sparsity/importance_threshold": -0.00016245727300108828, "compression/movement_sparsity/linear_layer_sparsity": 0.9076391178329075, "compression/movement_sparsity/model_sparsity": 0.8764589219108248, "compression_loss": 103.66053771972656, "distillation_loss": 4.7300333976745605, "epoch": 4.15, "learning_rate": 3.25255940640556e-05, "loss": 108.0324, "step": 4904, "task_loss": 2.0891354084014893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9768728207640621, "compression/movement_sparsity/importance_threshold": -0.00016197663049366106, "compression/movement_sparsity/linear_layer_sparsity": 0.9077004080545559, "compression/movement_sparsity/model_sparsity": 0.8765181066248087, "compression_loss": 103.667236328125, "distillation_loss": 2.6300272941589355, "epoch": 4.15, "learning_rate": 3.2520897905513295e-05, "loss": 107.447, "step": 4905, "task_loss": 1.883621335029602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9769413118722222, "compression/movement_sparsity/importance_threshold": -0.0001614969369346077, "compression/movement_sparsity/linear_layer_sparsity": 0.9077660863698942, "compression/movement_sparsity/model_sparsity": 0.876581528687965, "compression_loss": 103.6739273071289, "distillation_loss": 6.206882476806641, "epoch": 4.15, "learning_rate": 3.251620174697098e-05, "loss": 108.4054, "step": 4906, "task_loss": 3.441716194152832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9770096676225052, "compression/movement_sparsity/importance_threshold": -0.00016101819138623213, "compression/movement_sparsity/linear_layer_sparsity": 0.9077905905343858, "compression/movement_sparsity/model_sparsity": 0.8766051910590228, "compression_loss": 103.68055725097656, "distillation_loss": 4.013655662536621, "epoch": 4.15, "learning_rate": 3.251150558842866e-05, "loss": 107.3144, "step": 4907, "task_loss": 1.9541929960250854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9770778881487961, "compression/movement_sparsity/importance_threshold": -0.00016054039291083736, "compression/movement_sparsity/linear_layer_sparsity": 0.9078108377710316, "compression/movement_sparsity/model_sparsity": 0.8766247427408019, "compression_loss": 103.68716430664062, "distillation_loss": 5.0958967208862305, "epoch": 4.15, "learning_rate": 3.2506809429886354e-05, "loss": 107.7633, "step": 4908, "task_loss": 2.7743234634399414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9771459735849801, "compression/movement_sparsity/importance_threshold": -0.00016006354057072816, "compression/movement_sparsity/linear_layer_sparsity": 0.9078627079002476, "compression/movement_sparsity/model_sparsity": 0.8766748309715082, "compression_loss": 103.6937255859375, "distillation_loss": 4.230184555053711, "epoch": 4.15, "learning_rate": 3.250211327134404e-05, "loss": 107.8555, "step": 4909, "task_loss": 2.480123519897461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9772139240649425, "compression/movement_sparsity/importance_threshold": -0.00015958763342820842, "compression/movement_sparsity/linear_layer_sparsity": 0.9078116605385984, "compression/movement_sparsity/model_sparsity": 0.8766255372437718, "compression_loss": 103.70024871826172, "distillation_loss": 5.658873558044434, "epoch": 4.15, "learning_rate": 3.2497417112801734e-05, "loss": 108.2364, "step": 4910, "task_loss": 3.0606720447540283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9772817397225682, "compression/movement_sparsity/importance_threshold": -0.00015911267054558118, "compression/movement_sparsity/linear_layer_sparsity": 0.9078693377374532, "compression/movement_sparsity/model_sparsity": 0.87668123305341, "compression_loss": 103.70674896240234, "distillation_loss": 4.8780975341796875, "epoch": 4.15, "learning_rate": 3.249272095425941e-05, "loss": 108.5508, "step": 4911, "task_loss": 3.2342422008514404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9773494206917425, "compression/movement_sparsity/importance_threshold": -0.00015863865098515208, "compression/movement_sparsity/linear_layer_sparsity": 0.9079308783666218, "compression/movement_sparsity/model_sparsity": 0.8767406595726456, "compression_loss": 103.71328735351562, "distillation_loss": 4.006883144378662, "epoch": 4.15, "learning_rate": 3.2488024795717106e-05, "loss": 108.1279, "step": 4912, "task_loss": 1.8937087059020996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9774169671063503, "compression/movement_sparsity/importance_threshold": -0.00015816557380922414, "compression/movement_sparsity/linear_layer_sparsity": 0.907959198264757, "compression/movement_sparsity/model_sparsity": 0.8767680065951576, "compression_loss": 103.71977233886719, "distillation_loss": 5.814964294433594, "epoch": 4.15, "learning_rate": 3.248332863717479e-05, "loss": 108.0557, "step": 4913, "task_loss": 3.5787041187286377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9774843791002771, "compression/movement_sparsity/importance_threshold": -0.00015769343808010126, "compression/movement_sparsity/linear_layer_sparsity": 0.9080090770579777, "compression/movement_sparsity/model_sparsity": 0.8768161718983862, "compression_loss": 103.72622680664062, "distillation_loss": 4.371733665466309, "epoch": 4.15, "learning_rate": 3.247863247863248e-05, "loss": 107.9525, "step": 4914, "task_loss": 1.6969681978225708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9775516568074077, "compression/movement_sparsity/importance_threshold": -0.0001572222428600882, "compression/movement_sparsity/linear_layer_sparsity": 0.9080994860969929, "compression/movement_sparsity/model_sparsity": 0.8769034751087804, "compression_loss": 103.73272705078125, "distillation_loss": 3.0055060386657715, "epoch": 4.15, "learning_rate": 3.247393632009017e-05, "loss": 107.7652, "step": 4915, "task_loss": 1.1817177534103394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776188003616275, "compression/movement_sparsity/importance_threshold": -0.00015675198721148802, "compression/movement_sparsity/linear_layer_sparsity": 0.9081801530910495, "compression/movement_sparsity/model_sparsity": 0.8769813709434304, "compression_loss": 103.73918151855469, "distillation_loss": 5.603509902954102, "epoch": 4.16, "learning_rate": 3.246924016154785e-05, "loss": 108.0408, "step": 4916, "task_loss": 3.8461949825286865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776858098968216, "compression/movement_sparsity/importance_threshold": -0.00015628267019660459, "compression/movement_sparsity/linear_layer_sparsity": 0.9082577555740237, "compression/movement_sparsity/model_sparsity": 0.8770563075423814, "compression_loss": 103.74560546875, "distillation_loss": 4.520080089569092, "epoch": 4.16, "learning_rate": 3.2464544003005545e-05, "loss": 107.5718, "step": 4917, "task_loss": 2.4631948471069336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977752685546875, "compression/movement_sparsity/importance_threshold": -0.00015581429087774268, "compression/movement_sparsity/linear_layer_sparsity": 0.9083243043535995, "compression/movement_sparsity/model_sparsity": 0.8771205701666507, "compression_loss": 103.75205993652344, "distillation_loss": 4.112934112548828, "epoch": 4.16, "learning_rate": 3.245984784446323e-05, "loss": 107.8388, "step": 4918, "task_loss": 3.36824369430542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977819427445673, "compression/movement_sparsity/importance_threshold": -0.0001553468483172062, "compression/movement_sparsity/linear_layer_sparsity": 0.9084005474814631, "compression/movement_sparsity/model_sparsity": 0.877194194108521, "compression_loss": 103.75852966308594, "distillation_loss": 3.742875576019287, "epoch": 4.16, "learning_rate": 3.245515168592092e-05, "loss": 108.1045, "step": 4919, "task_loss": 1.465295672416687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9778860357271005, "compression/movement_sparsity/importance_threshold": -0.00015488034157729817, "compression/movement_sparsity/linear_layer_sparsity": 0.9084050667409972, "compression/movement_sparsity/model_sparsity": 0.8771985581175871, "compression_loss": 103.76490783691406, "distillation_loss": 4.877608299255371, "epoch": 4.16, "learning_rate": 3.2450455527378604e-05, "loss": 107.9625, "step": 4920, "task_loss": 2.666753053665161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.977952510525043, "compression/movement_sparsity/importance_threshold": -0.00015441476972032336, "compression/movement_sparsity/linear_layer_sparsity": 0.9083827923958534, "compression/movement_sparsity/model_sparsity": 0.8771770489647229, "compression_loss": 103.7712631225586, "distillation_loss": 3.0660858154296875, "epoch": 4.16, "learning_rate": 3.244575936883629e-05, "loss": 107.9188, "step": 4921, "task_loss": 2.8454513549804688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9780188519733853, "compression/movement_sparsity/importance_threshold": -0.00015395013180858567, "compression/movement_sparsity/linear_layer_sparsity": 0.9084047924851415, "compression/movement_sparsity/model_sparsity": 0.8771982932832638, "compression_loss": 103.77765655517578, "distillation_loss": 5.5451860427856445, "epoch": 4.16, "learning_rate": 3.2441063210293983e-05, "loss": 109.4399, "step": 4922, "task_loss": 3.351867914199829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9780850602060127, "compression/movement_sparsity/importance_threshold": -0.00015348642690438986, "compression/movement_sparsity/linear_layer_sparsity": 0.9084170386053035, "compression/movement_sparsity/model_sparsity": 0.8772101187115249, "compression_loss": 103.78401947021484, "distillation_loss": 4.588109016418457, "epoch": 4.16, "learning_rate": 3.243636705175167e-05, "loss": 107.4691, "step": 4923, "task_loss": 2.8633599281311035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9781511353568103, "compression/movement_sparsity/importance_threshold": -0.00015302365407003898, "compression/movement_sparsity/linear_layer_sparsity": 0.9084920416197332, "compression/movement_sparsity/model_sparsity": 0.8772825451416725, "compression_loss": 103.7903823852539, "distillation_loss": 4.177319049835205, "epoch": 4.16, "learning_rate": 3.2431670893209356e-05, "loss": 108.6148, "step": 4924, "task_loss": 1.9912488460540771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9782170775596634, "compression/movement_sparsity/importance_threshold": -0.0001525618123678369, "compression/movement_sparsity/linear_layer_sparsity": 0.9085561697932788, "compression/movement_sparsity/model_sparsity": 0.8773444703151755, "compression_loss": 103.7967758178711, "distillation_loss": 3.715540885925293, "epoch": 4.16, "learning_rate": 3.242697473466704e-05, "loss": 107.8677, "step": 4925, "task_loss": 1.77749764919281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9782828869484569, "compression/movement_sparsity/importance_threshold": -0.00015210090086008755, "compression/movement_sparsity/linear_layer_sparsity": 0.9086346904371609, "compression/movement_sparsity/model_sparsity": 0.8774202935333826, "compression_loss": 103.8031234741211, "distillation_loss": 5.273666858673096, "epoch": 4.16, "learning_rate": 3.242227857612473e-05, "loss": 108.6352, "step": 4926, "task_loss": 2.6297898292541504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9783485636570761, "compression/movement_sparsity/importance_threshold": -0.0001516409186090948, "compression/movement_sparsity/linear_layer_sparsity": 0.9087253141111935, "compression/movement_sparsity/model_sparsity": 0.877507804005421, "compression_loss": 103.80950164794922, "distillation_loss": 5.81418514251709, "epoch": 4.16, "learning_rate": 3.241758241758242e-05, "loss": 108.263, "step": 4927, "task_loss": 2.356076240539551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9784141078194061, "compression/movement_sparsity/importance_threshold": -0.00015118186467716344, "compression/movement_sparsity/linear_layer_sparsity": 0.9087867712711888, "compression/movement_sparsity/model_sparsity": 0.8775671499229062, "compression_loss": 103.81583404541016, "distillation_loss": 4.095522880554199, "epoch": 4.17, "learning_rate": 3.241288625904011e-05, "loss": 108.1185, "step": 4928, "task_loss": 1.751255750656128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9784795195693319, "compression/movement_sparsity/importance_threshold": -0.00015072373812659821, "compression/movement_sparsity/linear_layer_sparsity": 0.9087989816188479, "compression/movement_sparsity/model_sparsity": 0.8775789408075597, "compression_loss": 103.82217407226562, "distillation_loss": 4.528531074523926, "epoch": 4.17, "learning_rate": 3.2408190100497795e-05, "loss": 108.7427, "step": 4929, "task_loss": 2.6554863452911377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9785447990407389, "compression/movement_sparsity/importance_threshold": -0.00015026653801970043, "compression/movement_sparsity/linear_layer_sparsity": 0.9087965490886502, "compression/movement_sparsity/model_sparsity": 0.8775765918422577, "compression_loss": 103.82848358154297, "distillation_loss": 4.656617164611816, "epoch": 4.17, "learning_rate": 3.240349394195548e-05, "loss": 108.6566, "step": 4930, "task_loss": 2.644467353820801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9786099463675121, "compression/movement_sparsity/importance_threshold": -0.00014981026341877573, "compression/movement_sparsity/linear_layer_sparsity": 0.9088196938980314, "compression/movement_sparsity/model_sparsity": 0.8775989415562349, "compression_loss": 103.83478546142578, "distillation_loss": 4.464627742767334, "epoch": 4.17, "learning_rate": 3.239879778341317e-05, "loss": 108.2843, "step": 4931, "task_loss": 2.8621952533721924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9786749616835365, "compression/movement_sparsity/importance_threshold": -0.00014935491338612886, "compression/movement_sparsity/linear_layer_sparsity": 0.9088313438098117, "compression/movement_sparsity/model_sparsity": 0.8776101912577061, "compression_loss": 103.84107208251953, "distillation_loss": 4.504247188568115, "epoch": 4.17, "learning_rate": 3.239410162487086e-05, "loss": 108.6984, "step": 4932, "task_loss": 3.1847739219665527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9787398451226975, "compression/movement_sparsity/importance_threshold": -0.000148900486984062, "compression/movement_sparsity/linear_layer_sparsity": 0.9088649461142095, "compression/movement_sparsity/model_sparsity": 0.8776426392195752, "compression_loss": 103.84729766845703, "distillation_loss": 4.58244514465332, "epoch": 4.17, "learning_rate": 3.238940546632854e-05, "loss": 107.6181, "step": 4933, "task_loss": 2.603705883026123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9788045968188802, "compression/movement_sparsity/importance_threshold": -0.00014844698327487905, "compression/movement_sparsity/linear_layer_sparsity": 0.9088454858726278, "compression/movement_sparsity/model_sparsity": 0.8776238474971585, "compression_loss": 103.8534927368164, "distillation_loss": 3.9984004497528076, "epoch": 4.17, "learning_rate": 3.238470930778623e-05, "loss": 107.2262, "step": 4934, "task_loss": 1.6735730171203613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9788692169059696, "compression/movement_sparsity/importance_threshold": -0.0001479944013208865, "compression/movement_sparsity/linear_layer_sparsity": 0.9089138471256841, "compression/movement_sparsity/model_sparsity": 0.8776898603308686, "compression_loss": 103.85972595214844, "distillation_loss": 5.435490131378174, "epoch": 4.17, "learning_rate": 3.238001314924392e-05, "loss": 108.7948, "step": 4935, "task_loss": 2.7765865325927734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.978933705517851, "compression/movement_sparsity/importance_threshold": -0.00014754274018438564, "compression/movement_sparsity/linear_layer_sparsity": 0.9089759243423965, "compression/movement_sparsity/model_sparsity": 0.877749805004215, "compression_loss": 103.86590576171875, "distillation_loss": 3.8262877464294434, "epoch": 4.17, "learning_rate": 3.237531699070161e-05, "loss": 108.09, "step": 4936, "task_loss": 1.668071985244751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9789980627884093, "compression/movement_sparsity/importance_threshold": -0.000147091998927683, "compression/movement_sparsity/linear_layer_sparsity": 0.9090050670080985, "compression/movement_sparsity/model_sparsity": 0.8777779465296968, "compression_loss": 103.87210083007812, "distillation_loss": 3.899608612060547, "epoch": 4.17, "learning_rate": 3.237062083215929e-05, "loss": 107.5575, "step": 4937, "task_loss": 2.410928964614868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9790622888515299, "compression/movement_sparsity/importance_threshold": -0.00014664217661307984, "compression/movement_sparsity/linear_layer_sparsity": 0.9089711665995097, "compression/movement_sparsity/model_sparsity": 0.8777452107044329, "compression_loss": 103.87832641601562, "distillation_loss": 6.1531877517700195, "epoch": 4.17, "learning_rate": 3.2365924673616985e-05, "loss": 109.1951, "step": 4938, "task_loss": 3.091554641723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9791263838410977, "compression/movement_sparsity/importance_threshold": -0.00014619327230288182, "compression/movement_sparsity/linear_layer_sparsity": 0.9089893748034897, "compression/movement_sparsity/model_sparsity": 0.8777627934005912, "compression_loss": 103.88449096679688, "distillation_loss": 3.6427648067474365, "epoch": 4.17, "learning_rate": 3.236122851507467e-05, "loss": 108.0407, "step": 4939, "task_loss": 1.7808279991149902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9791903478909981, "compression/movement_sparsity/importance_threshold": -0.00014574528505939283, "compression/movement_sparsity/linear_layer_sparsity": 0.9088964736134387, "compression/movement_sparsity/model_sparsity": 0.877673083652216, "compression_loss": 103.89066314697266, "distillation_loss": 4.707060813903809, "epoch": 4.18, "learning_rate": 3.235653235653236e-05, "loss": 108.5641, "step": 4940, "task_loss": 1.8706927299499512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9792541811351161, "compression/movement_sparsity/importance_threshold": -0.0001452982139449159, "compression/movement_sparsity/linear_layer_sparsity": 0.9089289550460788, "compression/movement_sparsity/model_sparsity": 0.8777044492477203, "compression_loss": 103.89676666259766, "distillation_loss": 6.042444705963135, "epoch": 4.18, "learning_rate": 3.2351836197990044e-05, "loss": 108.3449, "step": 4941, "task_loss": 2.5021352767944336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9793178837073369, "compression/movement_sparsity/importance_threshold": -0.0001448520580217558, "compression/movement_sparsity/linear_layer_sparsity": 0.9088778242152562, "compression/movement_sparsity/model_sparsity": 0.8776550749182332, "compression_loss": 103.90290069580078, "distillation_loss": 4.144269943237305, "epoch": 4.18, "learning_rate": 3.234714003944773e-05, "loss": 107.3744, "step": 4942, "task_loss": 2.4927775859832764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9793814557415456, "compression/movement_sparsity/importance_threshold": -0.00014440681635221643, "compression/movement_sparsity/linear_layer_sparsity": 0.9089402830053329, "compression/movement_sparsity/model_sparsity": 0.8777153880567251, "compression_loss": 103.90899658203125, "distillation_loss": 4.880977630615234, "epoch": 4.18, "learning_rate": 3.2342443880905424e-05, "loss": 108.66, "step": 4943, "task_loss": 2.876920223236084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9794448973716272, "compression/movement_sparsity/importance_threshold": -0.00014396248799860168, "compression/movement_sparsity/linear_layer_sparsity": 0.9089814929286825, "compression/movement_sparsity/model_sparsity": 0.877755182292431, "compression_loss": 103.91514587402344, "distillation_loss": 3.806028366088867, "epoch": 4.18, "learning_rate": 3.233774772236311e-05, "loss": 108.3256, "step": 4944, "task_loss": 1.8639618158340454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.979508208731467, "compression/movement_sparsity/importance_threshold": -0.00014351907202321632, "compression/movement_sparsity/linear_layer_sparsity": 0.9089820056678908, "compression/movement_sparsity/model_sparsity": 0.8777556774174702, "compression_loss": 103.92124938964844, "distillation_loss": 6.151072025299072, "epoch": 4.18, "learning_rate": 3.2333051563820796e-05, "loss": 108.0019, "step": 4945, "task_loss": 3.5995988845825195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9795713899549502, "compression/movement_sparsity/importance_threshold": -0.00014307656748836252, "compression/movement_sparsity/linear_layer_sparsity": 0.9090139743613225, "compression/movement_sparsity/model_sparsity": 0.8777865478879353, "compression_loss": 103.9273452758789, "distillation_loss": 5.25455379486084, "epoch": 4.18, "learning_rate": 3.232835540527848e-05, "loss": 108.6378, "step": 4946, "task_loss": 2.0346150398254395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9796344411759619, "compression/movement_sparsity/importance_threshold": -0.00014263497345634504, "compression/movement_sparsity/linear_layer_sparsity": 0.9091227943151674, "compression/movement_sparsity/model_sparsity": 0.8778916295415963, "compression_loss": 103.93344116210938, "distillation_loss": 5.238717079162598, "epoch": 4.18, "learning_rate": 3.232365924673617e-05, "loss": 108.3677, "step": 4947, "task_loss": 2.6636807918548584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9796973625283871, "compression/movement_sparsity/importance_threshold": -0.00014219428898946864, "compression/movement_sparsity/linear_layer_sparsity": 0.9091323813459466, "compression/movement_sparsity/model_sparsity": 0.8779008872283751, "compression_loss": 103.93955993652344, "distillation_loss": 3.691100597381592, "epoch": 4.18, "learning_rate": 3.231896308819386e-05, "loss": 108.4561, "step": 4948, "task_loss": 2.0553925037384033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9797601541461111, "compression/movement_sparsity/importance_threshold": -0.00014175451315003723, "compression/movement_sparsity/linear_layer_sparsity": 0.9092285974546004, "compression/movement_sparsity/model_sparsity": 0.8779937980177013, "compression_loss": 103.94566345214844, "distillation_loss": 6.505134582519531, "epoch": 4.18, "learning_rate": 3.231426692965155e-05, "loss": 109.3271, "step": 4949, "task_loss": 3.639892339706421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9798228161630189, "compression/movement_sparsity/importance_threshold": -0.00014131564500035384, "compression/movement_sparsity/linear_layer_sparsity": 0.9092975787643739, "compression/movement_sparsity/model_sparsity": 0.8780604096072727, "compression_loss": 103.95176696777344, "distillation_loss": 4.966371536254883, "epoch": 4.18, "learning_rate": 3.2309570771109235e-05, "loss": 108.7232, "step": 4950, "task_loss": 2.5394816398620605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9798853487129957, "compression/movement_sparsity/importance_threshold": -0.0001408776836027241, "compression/movement_sparsity/linear_layer_sparsity": 0.9093315745663038, "compression/movement_sparsity/model_sparsity": 0.878093237548823, "compression_loss": 103.95781707763672, "distillation_loss": 3.184843063354492, "epoch": 4.19, "learning_rate": 3.230487461256692e-05, "loss": 108.213, "step": 4951, "task_loss": 1.816812515258789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9799477519299268, "compression/movement_sparsity/importance_threshold": -0.0001404406280194493, "compression/movement_sparsity/linear_layer_sparsity": 0.9093538369872799, "compression/movement_sparsity/model_sparsity": 0.8781147351871513, "compression_loss": 103.96385955810547, "distillation_loss": 3.5028109550476074, "epoch": 4.19, "learning_rate": 3.230017845402461e-05, "loss": 108.2512, "step": 4952, "task_loss": 1.4506467580795288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9800100259476971, "compression/movement_sparsity/importance_threshold": -0.00014000447731283509, "compression/movement_sparsity/linear_layer_sparsity": 0.9094300920393112, "compression/movement_sparsity/model_sparsity": 0.8781883706435574, "compression_loss": 103.96989440917969, "distillation_loss": 3.575268268585205, "epoch": 4.19, "learning_rate": 3.22954822954823e-05, "loss": 107.7554, "step": 4953, "task_loss": 1.6225048303604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9800721709001919, "compression/movement_sparsity/importance_threshold": -0.00013956923054518535, "compression/movement_sparsity/linear_layer_sparsity": 0.9094287446083683, "compression/movement_sparsity/model_sparsity": 0.8781870695010127, "compression_loss": 103.97588348388672, "distillation_loss": 4.586324691772461, "epoch": 4.19, "learning_rate": 3.229078613693998e-05, "loss": 108.4797, "step": 4954, "task_loss": 2.459829092025757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9801341869212963, "compression/movement_sparsity/importance_threshold": -0.00013913488677880486, "compression/movement_sparsity/linear_layer_sparsity": 0.9094863621863849, "compression/movement_sparsity/model_sparsity": 0.8782427077379719, "compression_loss": 103.98187255859375, "distillation_loss": 4.677105903625488, "epoch": 4.19, "learning_rate": 3.228608997839767e-05, "loss": 108.5416, "step": 4955, "task_loss": 3.102701425552368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9801960741448954, "compression/movement_sparsity/importance_threshold": -0.00013870144507599578, "compression/movement_sparsity/linear_layer_sparsity": 0.9094686547974455, "compression/movement_sparsity/model_sparsity": 0.8782256086523169, "compression_loss": 103.98786926269531, "distillation_loss": 3.6570844650268555, "epoch": 4.19, "learning_rate": 3.228139381985536e-05, "loss": 108.175, "step": 4956, "task_loss": 1.8535711765289307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9802578327048743, "compression/movement_sparsity/importance_threshold": -0.00013826890449906287, "compression/movement_sparsity/linear_layer_sparsity": 0.9095566670787659, "compression/movement_sparsity/model_sparsity": 0.8783105974410165, "compression_loss": 103.99382781982422, "distillation_loss": 5.6279778480529785, "epoch": 4.19, "learning_rate": 3.2276697661313046e-05, "loss": 108.555, "step": 4957, "task_loss": 2.464423894882202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9803194627351183, "compression/movement_sparsity/importance_threshold": -0.0001378372641103109, "compression/movement_sparsity/linear_layer_sparsity": 0.9096375964045105, "compression/movement_sparsity/model_sparsity": 0.8783887465954541, "compression_loss": 103.9997787475586, "distillation_loss": 5.5202484130859375, "epoch": 4.19, "learning_rate": 3.227200150277074e-05, "loss": 108.7242, "step": 4958, "task_loss": 2.882256507873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9803809643695124, "compression/movement_sparsity/importance_threshold": -0.00013740652297204378, "compression/movement_sparsity/linear_layer_sparsity": 0.909685126136707, "compression/movement_sparsity/model_sparsity": 0.878434643535131, "compression_loss": 104.00574493408203, "distillation_loss": 4.985022068023682, "epoch": 4.19, "learning_rate": 3.226730534422842e-05, "loss": 108.5123, "step": 4959, "task_loss": 2.5230209827423096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9804423377419418, "compression/movement_sparsity/importance_threshold": -0.00013697668014656366, "compression/movement_sparsity/linear_layer_sparsity": 0.9096312169748253, "compression/movement_sparsity/model_sparsity": 0.878382586318804, "compression_loss": 104.0115737915039, "distillation_loss": 4.303328514099121, "epoch": 4.19, "learning_rate": 3.226260918568611e-05, "loss": 108.48, "step": 4960, "task_loss": 1.6808784008026123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9805035829862916, "compression/movement_sparsity/importance_threshold": -0.00013654773469617704, "compression/movement_sparsity/linear_layer_sparsity": 0.9096204136789472, "compression/movement_sparsity/model_sparsity": 0.8783721541493741, "compression_loss": 104.0174789428711, "distillation_loss": 3.7154412269592285, "epoch": 4.19, "learning_rate": 3.22579130271438e-05, "loss": 108.1768, "step": 4961, "task_loss": 2.5633575916290283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980564700236447, "compression/movement_sparsity/importance_threshold": -0.00013611968568318523, "compression/movement_sparsity/linear_layer_sparsity": 0.909702809677311, "compression/movement_sparsity/model_sparsity": 0.8784517195917144, "compression_loss": 104.02338409423828, "distillation_loss": 3.5759315490722656, "epoch": 4.19, "learning_rate": 3.225321686860149e-05, "loss": 107.8621, "step": 4962, "task_loss": 2.4822640419006348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9806256896262932, "compression/movement_sparsity/importance_threshold": -0.00013569253216989471, "compression/movement_sparsity/linear_layer_sparsity": 0.9099019909735004, "compression/movement_sparsity/model_sparsity": 0.8786440583976264, "compression_loss": 104.02925109863281, "distillation_loss": 3.9838249683380127, "epoch": 4.2, "learning_rate": 3.224852071005917e-05, "loss": 108.0341, "step": 4963, "task_loss": 2.170424699783325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980686551289715, "compression/movement_sparsity/importance_threshold": -0.00013526627321860853, "compression/movement_sparsity/linear_layer_sparsity": 0.9099067964130577, "compression/movement_sparsity/model_sparsity": 0.8786486987555516, "compression_loss": 104.03507995605469, "distillation_loss": 3.334512233734131, "epoch": 4.2, "learning_rate": 3.224382455151686e-05, "loss": 108.1032, "step": 4964, "task_loss": 2.5189616680145264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980747285360598, "compression/movement_sparsity/importance_threshold": -0.00013484090789163058, "compression/movement_sparsity/linear_layer_sparsity": 0.9099511304683279, "compression/movement_sparsity/model_sparsity": 0.8786915097996357, "compression_loss": 104.04090881347656, "distillation_loss": 3.6471285820007324, "epoch": 4.2, "learning_rate": 3.223912839297455e-05, "loss": 108.2061, "step": 4965, "task_loss": 1.9344518184661865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.980807891972827, "compression/movement_sparsity/importance_threshold": -0.0001344164352512639, "compression/movement_sparsity/linear_layer_sparsity": 0.909972379335055, "compression/movement_sparsity/model_sparsity": 0.8787120287024216, "compression_loss": 104.04671478271484, "distillation_loss": 3.805270195007324, "epoch": 4.2, "learning_rate": 3.2234432234432237e-05, "loss": 108.5571, "step": 4966, "task_loss": 1.9280987977981567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9808683712602874, "compression/movement_sparsity/importance_threshold": -0.0001339928543598141, "compression/movement_sparsity/linear_layer_sparsity": 0.909907702649798, "compression/movement_sparsity/model_sparsity": 0.878649573860272, "compression_loss": 104.05254364013672, "distillation_loss": 4.468003749847412, "epoch": 4.2, "learning_rate": 3.222973607588992e-05, "loss": 108.4487, "step": 4967, "task_loss": 2.7226998805999756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9809287233568642, "compression/movement_sparsity/importance_threshold": -0.00013357016427958338, "compression/movement_sparsity/linear_layer_sparsity": 0.9099947848460428, "compression/movement_sparsity/model_sparsity": 0.8787336645151795, "compression_loss": 104.05830383300781, "distillation_loss": 5.158664703369141, "epoch": 4.2, "learning_rate": 3.222503991734761e-05, "loss": 108.6758, "step": 4968, "task_loss": 2.4551422595977783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9809889483964425, "compression/movement_sparsity/importance_threshold": -0.00013314836407287822, "compression/movement_sparsity/linear_layer_sparsity": 0.9100253584118612, "compression/movement_sparsity/model_sparsity": 0.8787631877849567, "compression_loss": 104.0639877319336, "distillation_loss": 5.051945686340332, "epoch": 4.2, "learning_rate": 3.22203437588053e-05, "loss": 108.5161, "step": 4969, "task_loss": 2.524357557296753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9810490465129075, "compression/movement_sparsity/importance_threshold": -0.0001327274528019999, "compression/movement_sparsity/linear_layer_sparsity": 0.9100611070664334, "compression/movement_sparsity/model_sparsity": 0.8787977083632688, "compression_loss": 104.06974029541016, "distillation_loss": 5.046250820159912, "epoch": 4.2, "learning_rate": 3.221564760026299e-05, "loss": 108.9531, "step": 4970, "task_loss": 3.820064067840576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9811090178401443, "compression/movement_sparsity/importance_threshold": -0.00013230742952925496, "compression/movement_sparsity/linear_layer_sparsity": 0.9100581260245245, "compression/movement_sparsity/model_sparsity": 0.8787948297293201, "compression_loss": 104.075439453125, "distillation_loss": 3.7273898124694824, "epoch": 4.2, "learning_rate": 3.221095144172067e-05, "loss": 108.5203, "step": 4971, "task_loss": 2.4891371726989746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981168862512038, "compression/movement_sparsity/importance_threshold": -0.00013188829331694553, "compression/movement_sparsity/linear_layer_sparsity": 0.9100377118495319, "compression/movement_sparsity/model_sparsity": 0.8787751168440399, "compression_loss": 104.08113861083984, "distillation_loss": 3.618380069732666, "epoch": 4.2, "learning_rate": 3.220625528317836e-05, "loss": 108.8195, "step": 4972, "task_loss": 2.506181240081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981228580662474, "compression/movement_sparsity/importance_threshold": -0.00013147004322737552, "compression/movement_sparsity/linear_layer_sparsity": 0.9100644816058744, "compression/movement_sparsity/model_sparsity": 0.8788009669768986, "compression_loss": 104.08683013916016, "distillation_loss": 4.888124465942383, "epoch": 4.2, "learning_rate": 3.220155912463605e-05, "loss": 108.679, "step": 4973, "task_loss": 2.743950843811035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9812881724253372, "compression/movement_sparsity/importance_threshold": -0.0001310526783228497, "compression/movement_sparsity/linear_layer_sparsity": 0.9100372110344912, "compression/movement_sparsity/model_sparsity": 0.8787746332335364, "compression_loss": 104.09255981445312, "distillation_loss": 4.180861473083496, "epoch": 4.2, "learning_rate": 3.219686296609374e-05, "loss": 108.3442, "step": 4974, "task_loss": 2.295189142227173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9813476379345127, "compression/movement_sparsity/importance_threshold": -0.00013063619766567197, "compression/movement_sparsity/linear_layer_sparsity": 0.9100385823137693, "compression/movement_sparsity/model_sparsity": 0.8787759574051528, "compression_loss": 104.09825134277344, "distillation_loss": 4.914698123931885, "epoch": 4.21, "learning_rate": 3.219216680755143e-05, "loss": 108.5093, "step": 4975, "task_loss": 2.9518239498138428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9814069773238858, "compression/movement_sparsity/importance_threshold": -0.0001302206003181471, "compression/movement_sparsity/linear_layer_sparsity": 0.9100127068699995, "compression/movement_sparsity/model_sparsity": 0.8787509708624787, "compression_loss": 104.10387420654297, "distillation_loss": 4.6972761154174805, "epoch": 4.21, "learning_rate": 3.2187470649009114e-05, "loss": 108.6708, "step": 4976, "task_loss": 2.975581645965576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9814661907273416, "compression/movement_sparsity/importance_threshold": -0.00012980588534257636, "compression/movement_sparsity/linear_layer_sparsity": 0.9100781586261527, "compression/movement_sparsity/model_sparsity": 0.878814174149455, "compression_loss": 104.10954284667969, "distillation_loss": 3.283097982406616, "epoch": 4.21, "learning_rate": 3.21827744904668e-05, "loss": 107.614, "step": 4977, "task_loss": 1.8064824342727661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9815252782787652, "compression/movement_sparsity/importance_threshold": -0.00012939205180126714, "compression/movement_sparsity/linear_layer_sparsity": 0.9100707775663861, "compression/movement_sparsity/model_sparsity": 0.8788070466517981, "compression_loss": 104.11517333984375, "distillation_loss": 3.0536389350891113, "epoch": 4.21, "learning_rate": 3.2178078331924486e-05, "loss": 108.659, "step": 4978, "task_loss": 1.1758376359939575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9815842401120418, "compression/movement_sparsity/importance_threshold": -0.00012897909875652074, "compression/movement_sparsity/linear_layer_sparsity": 0.9100524262723946, "compression/movement_sparsity/model_sparsity": 0.8787893257812103, "compression_loss": 104.12081909179688, "distillation_loss": 4.381033897399902, "epoch": 4.21, "learning_rate": 3.217338217338218e-05, "loss": 108.1671, "step": 4979, "task_loss": 2.1976146697998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9816430763610565, "compression/movement_sparsity/importance_threshold": -0.0001285670252706428, "compression/movement_sparsity/linear_layer_sparsity": 0.910067248012766, "compression/movement_sparsity/model_sparsity": 0.8788036383492029, "compression_loss": 104.12648010253906, "distillation_loss": 4.64116096496582, "epoch": 4.21, "learning_rate": 3.216868601483986e-05, "loss": 108.2276, "step": 4980, "task_loss": 2.452749729156494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9817017871596944, "compression/movement_sparsity/importance_threshold": -0.0001281558304059363, "compression/movement_sparsity/linear_layer_sparsity": 0.9101423344963689, "compression/movement_sparsity/model_sparsity": 0.8788761453811011, "compression_loss": 104.13203430175781, "distillation_loss": 6.146230220794678, "epoch": 4.21, "learning_rate": 3.216398985629755e-05, "loss": 108.974, "step": 4981, "task_loss": 2.807964563369751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9817603726418407, "compression/movement_sparsity/importance_threshold": -0.00012774551322470608, "compression/movement_sparsity/linear_layer_sparsity": 0.910203636642185, "compression/movement_sparsity/model_sparsity": 0.8789353416096208, "compression_loss": 104.13766479492188, "distillation_loss": 5.033144950866699, "epoch": 4.21, "learning_rate": 3.215929369775524e-05, "loss": 108.0734, "step": 4982, "task_loss": 3.7367429733276367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9818188329413805, "compression/movement_sparsity/importance_threshold": -0.00012733607278925512, "compression/movement_sparsity/linear_layer_sparsity": 0.9101836875097301, "compression/movement_sparsity/model_sparsity": 0.8789160777912366, "compression_loss": 104.1432876586914, "distillation_loss": 6.25330924987793, "epoch": 4.21, "learning_rate": 3.2154597539212925e-05, "loss": 109.0978, "step": 4983, "task_loss": 3.21545147895813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.981877168192199, "compression/movement_sparsity/importance_threshold": -0.0001269275081618882, "compression/movement_sparsity/linear_layer_sparsity": 0.9102833973995013, "compression/movement_sparsity/model_sparsity": 0.8790123623395506, "compression_loss": 104.1488037109375, "distillation_loss": 3.7482471466064453, "epoch": 4.21, "learning_rate": 3.214990138067061e-05, "loss": 108.0268, "step": 4984, "task_loss": 2.922497510910034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9819353785281812, "compression/movement_sparsity/importance_threshold": -0.00012651981840490923, "compression/movement_sparsity/linear_layer_sparsity": 0.9103520567567485, "compression/movement_sparsity/model_sparsity": 0.8790786630366555, "compression_loss": 104.15441131591797, "distillation_loss": 4.08882999420166, "epoch": 4.21, "learning_rate": 3.21452052221283e-05, "loss": 108.7037, "step": 4985, "task_loss": 3.259140729904175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9819934640832124, "compression/movement_sparsity/importance_threshold": -0.0001261130025806221, "compression/movement_sparsity/linear_layer_sparsity": 0.9103514128516962, "compression/movement_sparsity/model_sparsity": 0.8790780412517226, "compression_loss": 104.15991973876953, "distillation_loss": 4.379223346710205, "epoch": 4.21, "learning_rate": 3.214050906358599e-05, "loss": 108.5853, "step": 4986, "task_loss": 2.260141372680664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9820514249911777, "compression/movement_sparsity/importance_threshold": -0.0001257070597513307, "compression/movement_sparsity/linear_layer_sparsity": 0.910390846074068, "compression/movement_sparsity/model_sparsity": 0.8791161198215952, "compression_loss": 104.16545867919922, "distillation_loss": 4.392611503601074, "epoch": 4.22, "learning_rate": 3.213581290504368e-05, "loss": 108.2725, "step": 4987, "task_loss": 2.0115554332733154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9821092613859622, "compression/movement_sparsity/importance_threshold": -0.00012530198897933807, "compression/movement_sparsity/linear_layer_sparsity": 0.9104344169826094, "compression/movement_sparsity/model_sparsity": 0.8791581939353884, "compression_loss": 104.17098999023438, "distillation_loss": 5.762123107910156, "epoch": 4.22, "learning_rate": 3.213111674650136e-05, "loss": 108.8023, "step": 4988, "task_loss": 4.737924098968506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9821669734014511, "compression/movement_sparsity/importance_threshold": -0.00012489778932694984, "compression/movement_sparsity/linear_layer_sparsity": 0.9104602089572058, "compression/movement_sparsity/model_sparsity": 0.879183099876312, "compression_loss": 104.17650604248047, "distillation_loss": 3.920383930206299, "epoch": 4.22, "learning_rate": 3.212642058795905e-05, "loss": 108.088, "step": 4989, "task_loss": 2.5375595092773438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9822245611715296, "compression/movement_sparsity/importance_threshold": -0.00012449445985646904, "compression/movement_sparsity/linear_layer_sparsity": 0.9104644539608842, "compression/movement_sparsity/model_sparsity": 0.8791871990510549, "compression_loss": 104.18199920654297, "distillation_loss": 3.7607803344726562, "epoch": 4.22, "learning_rate": 3.2121724429416736e-05, "loss": 108.3436, "step": 4990, "task_loss": 3.2512242794036865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9822820248300825, "compression/movement_sparsity/importance_threshold": -0.00012409199963020045, "compression/movement_sparsity/linear_layer_sparsity": 0.9104931315840484, "compression/movement_sparsity/model_sparsity": 0.8792148915096407, "compression_loss": 104.1875, "distillation_loss": 4.936339378356934, "epoch": 4.22, "learning_rate": 3.211702827087443e-05, "loss": 108.5708, "step": 4991, "task_loss": 1.9474214315414429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9823393645109953, "compression/movement_sparsity/importance_threshold": -0.00012369040771044709, "compression/movement_sparsity/linear_layer_sparsity": 0.9104792518529202, "compression/movement_sparsity/model_sparsity": 0.8792014885899759, "compression_loss": 104.19292449951172, "distillation_loss": 5.581599235534668, "epoch": 4.22, "learning_rate": 3.2112332112332115e-05, "loss": 108.1149, "step": 4992, "task_loss": 3.2073309421539307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9823965803481531, "compression/movement_sparsity/importance_threshold": -0.00012328968315951285, "compression/movement_sparsity/linear_layer_sparsity": 0.9104965895926628, "compression/movement_sparsity/model_sparsity": 0.8792182307250211, "compression_loss": 104.19840240478516, "distillation_loss": 6.594862937927246, "epoch": 4.22, "learning_rate": 3.21076359537898e-05, "loss": 108.6041, "step": 4993, "task_loss": 3.327862501144409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9824536724754409, "compression/movement_sparsity/importance_threshold": -0.00012288982503970338, "compression/movement_sparsity/linear_layer_sparsity": 0.9104894231679137, "compression/movement_sparsity/model_sparsity": 0.8792113104890086, "compression_loss": 104.20378875732422, "distillation_loss": 4.68754243850708, "epoch": 4.22, "learning_rate": 3.210293979524749e-05, "loss": 108.781, "step": 4994, "task_loss": 2.786020278930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9825106410267439, "compression/movement_sparsity/importance_threshold": -0.00012249083241332084, "compression/movement_sparsity/linear_layer_sparsity": 0.9104910448547121, "compression/movement_sparsity/model_sparsity": 0.8792128764658766, "compression_loss": 104.20924377441406, "distillation_loss": 4.950472354888916, "epoch": 4.22, "learning_rate": 3.2098243636705174e-05, "loss": 108.1564, "step": 4995, "task_loss": 2.900503158569336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9825674861359474, "compression/movement_sparsity/importance_threshold": -0.00012209270434266912, "compression/movement_sparsity/linear_layer_sparsity": 0.9104882903719882, "compression/movement_sparsity/model_sparsity": 0.8792102166081082, "compression_loss": 104.2146987915039, "distillation_loss": 3.38508677482605, "epoch": 4.22, "learning_rate": 3.209354747816287e-05, "loss": 108.2756, "step": 4996, "task_loss": 1.3145182132720947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9826242079369362, "compression/movement_sparsity/importance_threshold": -0.00012169543989005386, "compression/movement_sparsity/linear_layer_sparsity": 0.9105573074542647, "compression/movement_sparsity/model_sparsity": 0.8792768627412869, "compression_loss": 104.2201156616211, "distillation_loss": 3.99588942527771, "epoch": 4.22, "learning_rate": 3.208885131962055e-05, "loss": 108.4481, "step": 4997, "task_loss": 2.871309280395508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9826808065635957, "compression/movement_sparsity/importance_threshold": -0.00012129903811777722, "compression/movement_sparsity/linear_layer_sparsity": 0.9106197304718383, "compression/movement_sparsity/model_sparsity": 0.8793371413361714, "compression_loss": 104.2254638671875, "distillation_loss": 6.406543731689453, "epoch": 4.22, "learning_rate": 3.208415516107824e-05, "loss": 109.0371, "step": 4998, "task_loss": 2.234088897705078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9827372821498109, "compression/movement_sparsity/importance_threshold": -0.00012090349808814484, "compression/movement_sparsity/linear_layer_sparsity": 0.9105647362107018, "compression/movement_sparsity/model_sparsity": 0.879284036297087, "compression_loss": 104.23081970214844, "distillation_loss": 4.646080493927002, "epoch": 4.23, "learning_rate": 3.2079459002535926e-05, "loss": 108.6644, "step": 4999, "task_loss": 2.531714677810669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9827936348294671, "compression/movement_sparsity/importance_threshold": -0.00012050881886345975, "compression/movement_sparsity/linear_layer_sparsity": 0.9105828967180112, "compression/movement_sparsity/model_sparsity": 0.879301572935102, "compression_loss": 104.23619079589844, "distillation_loss": 3.7410311698913574, "epoch": 4.23, "learning_rate": 3.207476284399362e-05, "loss": 107.7993, "step": 5000, "task_loss": 2.567991256713867 }, { "epoch": 4.23, "eval_accuracy": 0.5636039603960395, "eval_loss": 108.08809661865234, "eval_runtime": 225.6217, "eval_samples_per_second": 111.913, "eval_steps_per_second": 0.878, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9828498647364494, "compression/movement_sparsity/importance_threshold": -0.00012011499950602585, "compression/movement_sparsity/linear_layer_sparsity": 0.9106460948064813, "compression/movement_sparsity/model_sparsity": 0.8793625999748131, "compression_loss": 104.24150848388672, "distillation_loss": 3.8410282135009766, "epoch": 4.23, "learning_rate": 3.20700666854513e-05, "loss": 108.6169, "step": 5001, "task_loss": 1.5692440271377563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9829059720046428, "compression/movement_sparsity/importance_threshold": -0.00011972203907814703, "compression/movement_sparsity/linear_layer_sparsity": 0.9107031519486188, "compression/movement_sparsity/model_sparsity": 0.87941769702859, "compression_loss": 104.24684143066406, "distillation_loss": 5.2896728515625, "epoch": 4.23, "learning_rate": 3.2065370526908985e-05, "loss": 108.7589, "step": 5002, "task_loss": 3.049318552017212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9829619567679325, "compression/movement_sparsity/importance_threshold": -0.00011932993664212892, "compression/movement_sparsity/linear_layer_sparsity": 0.9107144322112024, "compression/movement_sparsity/model_sparsity": 0.8794285897794516, "compression_loss": 104.25211334228516, "distillation_loss": 5.057112693786621, "epoch": 4.23, "learning_rate": 3.206067436836668e-05, "loss": 108.7067, "step": 5003, "task_loss": 2.031747817993164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9830178191602038, "compression/movement_sparsity/importance_threshold": -0.00011893869126027284, "compression/movement_sparsity/linear_layer_sparsity": 0.9107942048926863, "compression/movement_sparsity/model_sparsity": 0.8795056220239171, "compression_loss": 104.25740814208984, "distillation_loss": 3.154707431793213, "epoch": 4.23, "learning_rate": 3.2055978209824365e-05, "loss": 108.4804, "step": 5004, "task_loss": 2.0009868144989014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9830735593153417, "compression/movement_sparsity/importance_threshold": -0.00011854830199488353, "compression/movement_sparsity/linear_layer_sparsity": 0.9108988036911881, "compression/movement_sparsity/model_sparsity": 0.8796066275319068, "compression_loss": 104.26274871826172, "distillation_loss": 5.2205810546875, "epoch": 4.23, "learning_rate": 3.205128205128206e-05, "loss": 109.1014, "step": 5005, "task_loss": 2.3252501487731934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9831291773672313, "compression/movement_sparsity/importance_threshold": -0.00011815876790826664, "compression/movement_sparsity/linear_layer_sparsity": 0.9109258238550509, "compression/movement_sparsity/model_sparsity": 0.8796327194700172, "compression_loss": 104.2680435180664, "distillation_loss": 4.400875091552734, "epoch": 4.23, "learning_rate": 3.204658589273974e-05, "loss": 109.0912, "step": 5006, "task_loss": 3.1280903816223145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9831846734497577, "compression/movement_sparsity/importance_threshold": -0.00011777008806272606, "compression/movement_sparsity/linear_layer_sparsity": 0.910920159875424, "compression/movement_sparsity/model_sparsity": 0.8796272500655148, "compression_loss": 104.27330017089844, "distillation_loss": 3.95890474319458, "epoch": 4.23, "learning_rate": 3.204188973419743e-05, "loss": 108.1134, "step": 5007, "task_loss": 1.7033382654190063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9832400476968063, "compression/movement_sparsity/importance_threshold": -0.0001173822615205631, "compression/movement_sparsity/linear_layer_sparsity": 0.9109585556952114, "compression/movement_sparsity/model_sparsity": 0.8796643268707732, "compression_loss": 104.27851867675781, "distillation_loss": 3.591562032699585, "epoch": 4.23, "learning_rate": 3.203719357565512e-05, "loss": 108.8108, "step": 5008, "task_loss": 1.7163728475570679 }, { "compression/movement_sparsity/importance_regularization_factor": 0.983295300242262, "compression/movement_sparsity/importance_threshold": -0.00011699528734408424, "compression/movement_sparsity/linear_layer_sparsity": 0.9109861601432885, "compression/movement_sparsity/model_sparsity": 0.8796909830211376, "compression_loss": 104.2837142944336, "distillation_loss": 5.372992992401123, "epoch": 4.23, "learning_rate": 3.2032497417112803e-05, "loss": 108.8589, "step": 5009, "task_loss": 2.20035457611084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.98335043122001, "compression/movement_sparsity/importance_threshold": -0.00011660916459559252, "compression/movement_sparsity/linear_layer_sparsity": 0.9109619660071553, "compression/movement_sparsity/model_sparsity": 0.8796676200280105, "compression_loss": 104.28894805908203, "distillation_loss": 4.983962059020996, "epoch": 4.23, "learning_rate": 3.202780125857049e-05, "loss": 108.7299, "step": 5010, "task_loss": 2.9711713790893555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9834054407639355, "compression/movement_sparsity/importance_threshold": -0.00011622389233739098, "compression/movement_sparsity/linear_layer_sparsity": 0.9110587902483585, "compression/movement_sparsity/model_sparsity": 0.8797611180586622, "compression_loss": 104.2941665649414, "distillation_loss": 3.4205827713012695, "epoch": 4.24, "learning_rate": 3.2023105100028176e-05, "loss": 107.8804, "step": 5011, "task_loss": 2.3496716022491455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9834603290079236, "compression/movement_sparsity/importance_threshold": -0.00011583946963178524, "compression/movement_sparsity/linear_layer_sparsity": 0.9110237451196767, "compression/movement_sparsity/model_sparsity": 0.879727276837962, "compression_loss": 104.29930877685547, "distillation_loss": 4.932743072509766, "epoch": 4.24, "learning_rate": 3.201840894148587e-05, "loss": 108.7288, "step": 5012, "task_loss": 1.6641490459442139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9835150960858594, "compression/movement_sparsity/importance_threshold": -0.00011545589554107921, "compression/movement_sparsity/linear_layer_sparsity": 0.9110707144159943, "compression/movement_sparsity/model_sparsity": 0.8797726325944567, "compression_loss": 104.30450439453125, "distillation_loss": 5.274240016937256, "epoch": 4.24, "learning_rate": 3.2013712782943556e-05, "loss": 109.0163, "step": 5013, "task_loss": 2.080785036087036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9835697421316282, "compression/movement_sparsity/importance_threshold": -0.00011507316912757505, "compression/movement_sparsity/linear_layer_sparsity": 0.9111329227985506, "compression/movement_sparsity/model_sparsity": 0.8798327039276969, "compression_loss": 104.30966186523438, "distillation_loss": 4.168460369110107, "epoch": 4.24, "learning_rate": 3.200901662440124e-05, "loss": 108.1448, "step": 5014, "task_loss": 2.5841617584228516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9836242672791149, "compression/movement_sparsity/importance_threshold": -0.00011469128945357926, "compression/movement_sparsity/linear_layer_sparsity": 0.9111332089785739, "compression/movement_sparsity/model_sparsity": 0.8798329802765559, "compression_loss": 104.31482696533203, "distillation_loss": 4.5313825607299805, "epoch": 4.24, "learning_rate": 3.200432046585893e-05, "loss": 108.536, "step": 5015, "task_loss": 2.3273134231567383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9836786716622047, "compression/movement_sparsity/importance_threshold": -0.00011431025558139487, "compression/movement_sparsity/linear_layer_sparsity": 0.9110947654621159, "compression/movement_sparsity/model_sparsity": 0.8797958574131542, "compression_loss": 104.31990814208984, "distillation_loss": 4.947587490081787, "epoch": 4.24, "learning_rate": 3.1999624307316615e-05, "loss": 108.8537, "step": 5016, "task_loss": 2.730464458465576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9837329554147829, "compression/movement_sparsity/importance_threshold": -0.00011393006657332491, "compression/movement_sparsity/linear_layer_sparsity": 0.9111604437774542, "compression/movement_sparsity/model_sparsity": 0.8798592794763106, "compression_loss": 104.32501220703125, "distillation_loss": 5.169883728027344, "epoch": 4.24, "learning_rate": 3.199492814877431e-05, "loss": 108.8401, "step": 5017, "task_loss": 2.4348621368408203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9837871186707345, "compression/movement_sparsity/importance_threshold": -0.00011355072149167415, "compression/movement_sparsity/linear_layer_sparsity": 0.9112713266123001, "compression/movement_sparsity/model_sparsity": 0.8799663531446641, "compression_loss": 104.33008575439453, "distillation_loss": 4.420599937438965, "epoch": 4.24, "learning_rate": 3.199023199023199e-05, "loss": 109.0665, "step": 5018, "task_loss": 3.029005289077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9838411615639447, "compression/movement_sparsity/importance_threshold": -0.00011317221939874649, "compression/movement_sparsity/linear_layer_sparsity": 0.9112816410173051, "compression/movement_sparsity/model_sparsity": 0.8799763132181263, "compression_loss": 104.33518981933594, "distillation_loss": 4.387458801269531, "epoch": 4.24, "learning_rate": 3.198553583168968e-05, "loss": 109.2601, "step": 5019, "task_loss": 3.3819122314453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9838950842282987, "compression/movement_sparsity/importance_threshold": -0.00011279455935684496, "compression/movement_sparsity/linear_layer_sparsity": 0.9113776782634444, "compression/movement_sparsity/model_sparsity": 0.8800690512894156, "compression_loss": 104.34021759033203, "distillation_loss": 5.056623935699463, "epoch": 4.24, "learning_rate": 3.198083967314737e-05, "loss": 108.4819, "step": 5020, "task_loss": 2.787040948867798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9839488867976816, "compression/movement_sparsity/importance_threshold": -0.00011241774042827519, "compression/movement_sparsity/linear_layer_sparsity": 0.9113959937849331, "compression/movement_sparsity/model_sparsity": 0.880086737616396, "compression_loss": 104.34530639648438, "distillation_loss": 5.043505668640137, "epoch": 4.24, "learning_rate": 3.197614351460505e-05, "loss": 108.7554, "step": 5021, "task_loss": 2.578838586807251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9840025694059783, "compression/movement_sparsity/importance_threshold": -0.00011204176167534108, "compression/movement_sparsity/linear_layer_sparsity": 0.9113515881846571, "compression/movement_sparsity/model_sparsity": 0.8800438574850971, "compression_loss": 104.35037994384766, "distillation_loss": 3.8916447162628174, "epoch": 4.24, "learning_rate": 3.1971447356062746e-05, "loss": 108.6567, "step": 5022, "task_loss": 2.1005728244781494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9840561321870743, "compression/movement_sparsity/importance_threshold": -0.00011166662216034479, "compression/movement_sparsity/linear_layer_sparsity": 0.9113290038111548, "compression/movement_sparsity/model_sparsity": 0.8800220489543022, "compression_loss": 104.35537719726562, "distillation_loss": 4.1908464431762695, "epoch": 4.25, "learning_rate": 3.1966751197520426e-05, "loss": 108.3442, "step": 5023, "task_loss": 1.8488974571228027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9841095752748545, "compression/movement_sparsity/importance_threshold": -0.00011129232094559197, "compression/movement_sparsity/linear_layer_sparsity": 0.911314325160795, "compression/movement_sparsity/model_sparsity": 0.8800078745607391, "compression_loss": 104.3603515625, "distillation_loss": 4.80531120300293, "epoch": 4.25, "learning_rate": 3.196205503897812e-05, "loss": 108.5211, "step": 5024, "task_loss": 2.7543392181396484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9841628988032042, "compression/movement_sparsity/importance_threshold": -0.00011091885709338563, "compression/movement_sparsity/linear_layer_sparsity": 0.9113686755168793, "compression/movement_sparsity/model_sparsity": 0.8800603578148907, "compression_loss": 104.3653335571289, "distillation_loss": 5.396255970001221, "epoch": 4.25, "learning_rate": 3.1957358880435805e-05, "loss": 108.6167, "step": 5025, "task_loss": 3.078479528427124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9842161029060085, "compression/movement_sparsity/importance_threshold": -0.00011054622966603055, "compression/movement_sparsity/linear_layer_sparsity": 0.9113932154538739, "compression/movement_sparsity/model_sparsity": 0.8800840547295559, "compression_loss": 104.37031555175781, "distillation_loss": 4.5727858543396, "epoch": 4.25, "learning_rate": 3.195266272189349e-05, "loss": 108.5016, "step": 5026, "task_loss": 2.9454221725463867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9842691877171524, "compression/movement_sparsity/importance_threshold": -0.00011017443772582975, "compression/movement_sparsity/linear_layer_sparsity": 0.9114416991194813, "compression/movement_sparsity/model_sparsity": 0.8801308728320965, "compression_loss": 104.37532806396484, "distillation_loss": 3.7028183937072754, "epoch": 4.25, "learning_rate": 3.194796656335118e-05, "loss": 107.6632, "step": 5027, "task_loss": 2.0604896545410156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9843221533705212, "compression/movement_sparsity/importance_threshold": -0.00010980348033508888, "compression/movement_sparsity/linear_layer_sparsity": 0.9114545056755222, "compression/movement_sparsity/model_sparsity": 0.8801432394435398, "compression_loss": 104.3802490234375, "distillation_loss": 6.112961769104004, "epoch": 4.25, "learning_rate": 3.1943270404808864e-05, "loss": 108.7897, "step": 5028, "task_loss": 3.3483402729034424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984375, "compression/movement_sparsity/importance_threshold": -0.00010943335655611008, "compression/movement_sparsity/linear_layer_sparsity": 0.9113830322147128, "compression/movement_sparsity/model_sparsity": 0.8800742213159873, "compression_loss": 104.38521575927734, "distillation_loss": 5.418972969055176, "epoch": 4.25, "learning_rate": 3.193857424626656e-05, "loss": 108.7136, "step": 5029, "task_loss": 2.8751347064971924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984427727739474, "compression/movement_sparsity/importance_threshold": -0.00010906406545119814, "compression/movement_sparsity/linear_layer_sparsity": 0.9114651062605506, "compression/movement_sparsity/model_sparsity": 0.8801534758658611, "compression_loss": 104.39014434814453, "distillation_loss": 3.4208569526672363, "epoch": 4.25, "learning_rate": 3.1933878087724244e-05, "loss": 108.6336, "step": 5030, "task_loss": 1.2710500955581665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9844803367228281, "compression/movement_sparsity/importance_threshold": -0.00010869560608265782, "compression/movement_sparsity/linear_layer_sparsity": 0.9115104300217344, "compression/movement_sparsity/model_sparsity": 0.8801972426164162, "compression_loss": 104.3951187133789, "distillation_loss": 4.023708343505859, "epoch": 4.25, "learning_rate": 3.192918192918193e-05, "loss": 108.7203, "step": 5031, "task_loss": 2.1587107181549072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9845328270839477, "compression/movement_sparsity/importance_threshold": -0.0001083279775127904, "compression/movement_sparsity/linear_layer_sparsity": 0.9114767561723307, "compression/movement_sparsity/model_sparsity": 0.8801647255673324, "compression_loss": 104.40001678466797, "distillation_loss": 5.2169647216796875, "epoch": 4.25, "learning_rate": 3.1924485770639616e-05, "loss": 109.0389, "step": 5032, "task_loss": 2.93900465965271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9845851989567178, "compression/movement_sparsity/importance_threshold": -0.00010796117880390414, "compression/movement_sparsity/linear_layer_sparsity": 0.9115485873581691, "compression/movement_sparsity/model_sparsity": 0.8802340891309587, "compression_loss": 104.40501403808594, "distillation_loss": 3.410867691040039, "epoch": 4.25, "learning_rate": 3.191978961209731e-05, "loss": 108.1407, "step": 5033, "task_loss": 2.0547780990600586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9846374524750237, "compression/movement_sparsity/importance_threshold": -0.00010759520901829946, "compression/movement_sparsity/linear_layer_sparsity": 0.9115758340812171, "compression/movement_sparsity/model_sparsity": 0.8802603998452493, "compression_loss": 104.4099349975586, "distillation_loss": 4.657121658325195, "epoch": 4.26, "learning_rate": 3.1915093453554996e-05, "loss": 109.0712, "step": 5034, "task_loss": 2.269298791885376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9846895877727504, "compression/movement_sparsity/importance_threshold": -0.00010723006721828025, "compression/movement_sparsity/linear_layer_sparsity": 0.9115134945328168, "compression/movement_sparsity/model_sparsity": 0.8802002018521153, "compression_loss": 104.41484069824219, "distillation_loss": 3.967438220977783, "epoch": 4.26, "learning_rate": 3.1910397295012675e-05, "loss": 107.588, "step": 5035, "task_loss": 1.4696491956710815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.984741604983783, "compression/movement_sparsity/importance_threshold": -0.00010686575246615301, "compression/movement_sparsity/linear_layer_sparsity": 0.9114568785848818, "compression/movement_sparsity/model_sparsity": 0.8801455308361629, "compression_loss": 104.41972351074219, "distillation_loss": 4.29909086227417, "epoch": 4.26, "learning_rate": 3.190570113647037e-05, "loss": 109.0746, "step": 5036, "task_loss": 2.5707712173461914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9847935042420068, "compression/movement_sparsity/importance_threshold": -0.00010650226382421992, "compression/movement_sparsity/linear_layer_sparsity": 0.9115578882089251, "compression/movement_sparsity/model_sparsity": 0.8802430704688785, "compression_loss": 104.42462921142578, "distillation_loss": 4.294763565063477, "epoch": 4.26, "learning_rate": 3.1901004977928055e-05, "loss": 108.8264, "step": 5037, "task_loss": 2.1533334255218506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9848452856813068, "compression/movement_sparsity/importance_threshold": -0.0001061396003547866, "compression/movement_sparsity/linear_layer_sparsity": 0.9115662232021027, "compression/movement_sparsity/model_sparsity": 0.8802511191293988, "compression_loss": 104.42950439453125, "distillation_loss": 4.899670600891113, "epoch": 4.26, "learning_rate": 3.189630881938575e-05, "loss": 109.0155, "step": 5038, "task_loss": 2.5415704250335693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9848969494355683, "compression/movement_sparsity/importance_threshold": -0.00010577776112015436, "compression/movement_sparsity/linear_layer_sparsity": 0.9115838471218685, "compression/movement_sparsity/model_sparsity": 0.8802681376133031, "compression_loss": 104.43428802490234, "distillation_loss": 5.118988990783691, "epoch": 4.26, "learning_rate": 3.1891612660843434e-05, "loss": 108.9186, "step": 5039, "task_loss": 2.8862574100494385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9849484956386761, "compression/movement_sparsity/importance_threshold": -0.00010541674518263055, "compression/movement_sparsity/linear_layer_sparsity": 0.9116718594031887, "compression/movement_sparsity/model_sparsity": 0.8803531264020027, "compression_loss": 104.4391098022461, "distillation_loss": 4.8668107986450195, "epoch": 4.26, "learning_rate": 3.188691650230112e-05, "loss": 109.0847, "step": 5040, "task_loss": 1.5400503873825073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9849999244245158, "compression/movement_sparsity/importance_threshold": -0.00010505655160451561, "compression/movement_sparsity/linear_layer_sparsity": 0.9117413176796676, "compression/movement_sparsity/model_sparsity": 0.8804201985730059, "compression_loss": 104.4439697265625, "distillation_loss": 4.153745651245117, "epoch": 4.26, "learning_rate": 3.188222034375881e-05, "loss": 108.4479, "step": 5041, "task_loss": 1.8383405208587646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9850512359269723, "compression/movement_sparsity/importance_threshold": -0.00010469717944811604, "compression/movement_sparsity/linear_layer_sparsity": 0.9117762793391759, "compression/movement_sparsity/model_sparsity": 0.8804539591919555, "compression_loss": 104.44877624511719, "distillation_loss": 4.911004066467285, "epoch": 4.26, "learning_rate": 3.187752418521649e-05, "loss": 108.5196, "step": 5042, "task_loss": 2.4689230918884277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851024302799307, "compression/movement_sparsity/importance_threshold": -0.00010433862777573487, "compression/movement_sparsity/linear_layer_sparsity": 0.9117506304545913, "compression/movement_sparsity/model_sparsity": 0.8804291914254614, "compression_loss": 104.45355224609375, "distillation_loss": 2.248910427093506, "epoch": 4.26, "learning_rate": 3.1872828026674186e-05, "loss": 108.1036, "step": 5043, "task_loss": 0.9459171295166016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851535076172762, "compression/movement_sparsity/importance_threshold": -0.000103980895649676, "compression/movement_sparsity/linear_layer_sparsity": 0.9117966338933304, "compression/movement_sparsity/model_sparsity": 0.8804736145045567, "compression_loss": 104.4583511352539, "distillation_loss": 5.878549575805664, "epoch": 4.26, "learning_rate": 3.1868131868131866e-05, "loss": 108.4979, "step": 5044, "task_loss": 2.47756028175354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985204468072894, "compression/movement_sparsity/importance_threshold": -0.00010362398213224332, "compression/movement_sparsity/linear_layer_sparsity": 0.9118314643869948, "compression/movement_sparsity/model_sparsity": 0.8805072484636126, "compression_loss": 104.46318054199219, "distillation_loss": 5.743114471435547, "epoch": 4.26, "learning_rate": 3.186343570958956e-05, "loss": 108.7096, "step": 5045, "task_loss": 3.053860902786255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985255311780669, "compression/movement_sparsity/importance_threshold": -0.00010326788628574248, "compression/movement_sparsity/linear_layer_sparsity": 0.9118860413022641, "compression/movement_sparsity/model_sparsity": 0.8805599504939442, "compression_loss": 104.46797943115234, "distillation_loss": 4.399833679199219, "epoch": 4.27, "learning_rate": 3.1858739551047245e-05, "loss": 108.348, "step": 5046, "task_loss": 2.8133544921875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9853060388744866, "compression/movement_sparsity/importance_threshold": -0.00010291260717247477, "compression/movement_sparsity/linear_layer_sparsity": 0.9119109151159526, "compression/movement_sparsity/model_sparsity": 0.8805839698156116, "compression_loss": 104.47276306152344, "distillation_loss": 3.8360495567321777, "epoch": 4.27, "learning_rate": 3.185404339250493e-05, "loss": 108.1954, "step": 5047, "task_loss": 2.0244178771972656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.985356649488232, "compression/movement_sparsity/importance_threshold": -0.00010255814385474581, "compression/movement_sparsity/linear_layer_sparsity": 0.9119037129187004, "compression/movement_sparsity/model_sparsity": 0.8805770150359917, "compression_loss": 104.47752380371094, "distillation_loss": 4.759662628173828, "epoch": 4.27, "learning_rate": 3.184934723396262e-05, "loss": 108.6321, "step": 5048, "task_loss": 3.2276015281677246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9854071437557902, "compression/movement_sparsity/importance_threshold": -0.00010220449539485865, "compression/movement_sparsity/linear_layer_sparsity": 0.9120151919619282, "compression/movement_sparsity/model_sparsity": 0.8806846644311349, "compression_loss": 104.48231506347656, "distillation_loss": 4.029386520385742, "epoch": 4.27, "learning_rate": 3.1844651075420304e-05, "loss": 108.9369, "step": 5049, "task_loss": 1.7876924276351929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9854575218110462, "compression/movement_sparsity/importance_threshold": -0.00010185166085511892, "compression/movement_sparsity/linear_layer_sparsity": 0.9121171078227118, "compression/movement_sparsity/model_sparsity": 0.8807830791685708, "compression_loss": 104.48702239990234, "distillation_loss": 5.76923942565918, "epoch": 4.27, "learning_rate": 3.1839954916878e-05, "loss": 108.7576, "step": 5050, "task_loss": 2.215848207473755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9855077837878853, "compression/movement_sparsity/importance_threshold": -0.00010149963929782964, "compression/movement_sparsity/linear_layer_sparsity": 0.912057963951238, "compression/movement_sparsity/model_sparsity": 0.8807259670710299, "compression_loss": 104.4917984008789, "distillation_loss": 4.154844760894775, "epoch": 4.27, "learning_rate": 3.1835258758335684e-05, "loss": 108.6595, "step": 5051, "task_loss": 2.0954430103302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9855579298201926, "compression/movement_sparsity/importance_threshold": -0.00010114842978529472, "compression/movement_sparsity/linear_layer_sparsity": 0.9120699835122149, "compression/movement_sparsity/model_sparsity": 0.8807375737231108, "compression_loss": 104.49649810791016, "distillation_loss": 4.78985595703125, "epoch": 4.27, "learning_rate": 3.183056259979337e-05, "loss": 108.5691, "step": 5052, "task_loss": 2.24904465675354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9856079600418534, "compression/movement_sparsity/importance_threshold": -0.00010079803137981718, "compression/movement_sparsity/linear_layer_sparsity": 0.9121093809620838, "compression/movement_sparsity/model_sparsity": 0.880775617749376, "compression_loss": 104.5011978149414, "distillation_loss": 5.814626693725586, "epoch": 4.27, "learning_rate": 3.1825866441251057e-05, "loss": 109.4795, "step": 5053, "task_loss": 3.0940518379211426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9856578745867526, "compression/movement_sparsity/importance_threshold": -0.00010044844314370267, "compression/movement_sparsity/linear_layer_sparsity": 0.912155122069135, "compression/movement_sparsity/model_sparsity": 0.8808197875086838, "compression_loss": 104.50589752197266, "distillation_loss": 4.1859049797058105, "epoch": 4.27, "learning_rate": 3.182117028270874e-05, "loss": 108.9171, "step": 5054, "task_loss": 1.856653094291687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9857076735887755, "compression/movement_sparsity/importance_threshold": -0.00010009966413925334, "compression/movement_sparsity/linear_layer_sparsity": 0.9121741411165142, "compression/movement_sparsity/model_sparsity": 0.8808381531932761, "compression_loss": 104.51061248779297, "distillation_loss": 4.932311534881592, "epoch": 4.27, "learning_rate": 3.1816474124166436e-05, "loss": 109.2564, "step": 5055, "task_loss": 2.129056930541992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9857573571818071, "compression/movement_sparsity/importance_threshold": -9.975169342877569e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9121804132286907, "compression/movement_sparsity/model_sparsity": 0.880844209839104, "compression_loss": 104.51529693603516, "distillation_loss": 4.652114391326904, "epoch": 4.27, "learning_rate": 3.181177796562412e-05, "loss": 108.4885, "step": 5056, "task_loss": 2.521817207336426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9858069254997327, "compression/movement_sparsity/importance_threshold": -9.94045300745719e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912164911810764, "compression/movement_sparsity/model_sparsity": 0.8808292409425712, "compression_loss": 104.51992797851562, "distillation_loss": 4.611353874206543, "epoch": 4.27, "learning_rate": 3.180708180708181e-05, "loss": 108.9046, "step": 5057, "task_loss": 2.739943742752075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9858563786764374, "compression/movement_sparsity/importance_threshold": -9.905817313894585e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9122902944334551, "compression/movement_sparsity/model_sparsity": 0.8809503162864507, "compression_loss": 104.52462768554688, "distillation_loss": 3.9913489818573, "epoch": 4.28, "learning_rate": 3.1802385648539495e-05, "loss": 108.7195, "step": 5058, "task_loss": 1.3560429811477661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9859057168458063, "compression/movement_sparsity/importance_threshold": -9.871262168420231e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9123255303488191, "compression/movement_sparsity/model_sparsity": 0.8809843417397236, "compression_loss": 104.52928161621094, "distillation_loss": 4.735760688781738, "epoch": 4.28, "learning_rate": 3.179768948999718e-05, "loss": 108.7523, "step": 5059, "task_loss": 2.7767319679260254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9859549401417245, "compression/movement_sparsity/importance_threshold": -9.836787477264519e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9123383488290276, "compression/movement_sparsity/model_sparsity": 0.8809967198657027, "compression_loss": 104.53387451171875, "distillation_loss": 3.8069205284118652, "epoch": 4.28, "learning_rate": 3.1792993331454874e-05, "loss": 108.7702, "step": 5060, "task_loss": 2.062389850616455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9860040486980772, "compression/movement_sparsity/importance_threshold": -9.802393146657751e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9123520377734736, "compression/movement_sparsity/model_sparsity": 0.8810099385527949, "compression_loss": 104.53852844238281, "distillation_loss": 4.421704292297363, "epoch": 4.28, "learning_rate": 3.1788297172912554e-05, "loss": 108.9536, "step": 5061, "task_loss": 2.2816669940948486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9860530426487495, "compression/movement_sparsity/importance_threshold": -9.768079082830491e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9123355466496332, "compression/movement_sparsity/model_sparsity": 0.880994013949791, "compression_loss": 104.54315185546875, "distillation_loss": 4.5552802085876465, "epoch": 4.28, "learning_rate": 3.178360101437025e-05, "loss": 109.0314, "step": 5062, "task_loss": 2.2028534412384033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861019221276266, "compression/movement_sparsity/importance_threshold": -9.733845192012956e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9124995039546263, "compression/movement_sparsity/model_sparsity": 0.881152338816966, "compression_loss": 104.5477294921875, "distillation_loss": 2.74337100982666, "epoch": 4.28, "learning_rate": 3.1778904855827933e-05, "loss": 108.8639, "step": 5063, "task_loss": 1.8484052419662476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861506872685937, "compression/movement_sparsity/importance_threshold": -9.69969138043562e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9125077435544627, "compression/movement_sparsity/model_sparsity": 0.8811602953612, "compression_loss": 104.55232238769531, "distillation_loss": 5.002686977386475, "epoch": 4.28, "learning_rate": 3.1774208697285627e-05, "loss": 109.1945, "step": 5064, "task_loss": 2.819601058959961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9861993382055357, "compression/movement_sparsity/importance_threshold": -9.665617554328963e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9125325338989777, "compression/movement_sparsity/model_sparsity": 0.8811842340811168, "compression_loss": 104.55687713623047, "distillation_loss": 5.790381908416748, "epoch": 4.28, "learning_rate": 3.1769512538743306e-05, "loss": 108.9055, "step": 5065, "task_loss": 2.860867500305176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9862478750723379, "compression/movement_sparsity/importance_threshold": -9.631623619923285e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9126209277536623, "compression/movement_sparsity/model_sparsity": 0.8812695913349617, "compression_loss": 104.56140899658203, "distillation_loss": 4.94313907623291, "epoch": 4.28, "learning_rate": 3.176481638020099e-05, "loss": 108.8471, "step": 5066, "task_loss": 3.8895223140716553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9862962980028855, "compression/movement_sparsity/importance_threshold": -9.597709483448978e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9126741572379888, "compression/movement_sparsity/model_sparsity": 0.8813209922227486, "compression_loss": 104.56596374511719, "distillation_loss": 4.835390090942383, "epoch": 4.28, "learning_rate": 3.1760120221658686e-05, "loss": 108.7852, "step": 5067, "task_loss": 2.5026934146881104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9863446071310636, "compression/movement_sparsity/importance_threshold": -9.563875051136518e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9127214961835032, "compression/movement_sparsity/model_sparsity": 0.8813667049298529, "compression_loss": 104.57048034667969, "distillation_loss": 3.897519826889038, "epoch": 4.28, "learning_rate": 3.175542406311637e-05, "loss": 108.6235, "step": 5068, "task_loss": 2.6005992889404297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9863928025907573, "compression/movement_sparsity/importance_threshold": -9.530120229216208e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.912723272884481, "compression/movement_sparsity/model_sparsity": 0.8813684205956863, "compression_loss": 104.57502746582031, "distillation_loss": 4.4903669357299805, "epoch": 4.28, "learning_rate": 3.1750727904574065e-05, "loss": 108.8103, "step": 5069, "task_loss": 2.770446300506592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9864408845158518, "compression/movement_sparsity/importance_threshold": -9.496444923918525e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9127726270143258, "compression/movement_sparsity/model_sparsity": 0.88141607925934, "compression_loss": 104.57952117919922, "distillation_loss": 4.977445125579834, "epoch": 4.29, "learning_rate": 3.1746031746031745e-05, "loss": 108.9538, "step": 5070, "task_loss": 3.13754940032959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986488853040232, "compression/movement_sparsity/importance_threshold": -9.462849041473858e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9128232093334371, "compression/movement_sparsity/model_sparsity": 0.8814649239201804, "compression_loss": 104.583984375, "distillation_loss": 4.58910608291626, "epoch": 4.29, "learning_rate": 3.174133558748944e-05, "loss": 108.6387, "step": 5071, "task_loss": 2.6777477264404297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9865367082977834, "compression/movement_sparsity/importance_threshold": -9.429332488112598e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9128593872580443, "compression/movement_sparsity/model_sparsity": 0.881499859021781, "compression_loss": 104.58843994140625, "distillation_loss": 7.378966331481934, "epoch": 4.29, "learning_rate": 3.1736639428947124e-05, "loss": 109.2171, "step": 5072, "task_loss": 3.9182486534118652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986584450422391, "compression/movement_sparsity/importance_threshold": -9.395895170065047e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9128690339096618, "compression/movement_sparsity/model_sparsity": 0.8815091742812388, "compression_loss": 104.59294891357422, "distillation_loss": 4.1890869140625, "epoch": 4.29, "learning_rate": 3.173194327040481e-05, "loss": 108.7325, "step": 5073, "task_loss": 2.506502628326416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9866320795479399, "compression/movement_sparsity/importance_threshold": -9.362536993561682e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129192942762469, "compression/movement_sparsity/model_sparsity": 0.8815577080496128, "compression_loss": 104.597412109375, "distillation_loss": 5.296530723571777, "epoch": 4.29, "learning_rate": 3.17272471118625e-05, "loss": 108.9708, "step": 5074, "task_loss": 4.015791893005371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9866795958083151, "compression/movement_sparsity/importance_threshold": -9.32925786483298e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129804414078836, "compression/movement_sparsity/model_sparsity": 0.8816167545891672, "compression_loss": 104.60186004638672, "distillation_loss": 4.187299728393555, "epoch": 4.29, "learning_rate": 3.172255095332018e-05, "loss": 109.1803, "step": 5075, "task_loss": 2.220911979675293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986726999337402, "compression/movement_sparsity/importance_threshold": -9.296057690109244e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9129801790761957, "compression/movement_sparsity/model_sparsity": 0.8816165012693797, "compression_loss": 104.60629272460938, "distillation_loss": 4.564126491546631, "epoch": 4.29, "learning_rate": 3.1717854794777876e-05, "loss": 108.3006, "step": 5076, "task_loss": 1.6244584321975708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9867742902690857, "compression/movement_sparsity/importance_threshold": -9.262936375620864e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130673566657814, "compression/movement_sparsity/model_sparsity": 0.8817006840405736, "compression_loss": 104.6106948852539, "distillation_loss": 4.110866069793701, "epoch": 4.29, "learning_rate": 3.171315863623556e-05, "loss": 108.7214, "step": 5077, "task_loss": 1.8795019388198853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9868214687372513, "compression/movement_sparsity/importance_threshold": -9.229893827598229e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9130596298051534, "compression/movement_sparsity/model_sparsity": 0.8816932226213787, "compression_loss": 104.61506652832031, "distillation_loss": 3.1517021656036377, "epoch": 4.29, "learning_rate": 3.170846247769325e-05, "loss": 108.7865, "step": 5078, "task_loss": 1.1313797235488892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9868685348757839, "compression/movement_sparsity/importance_threshold": -9.196929952271816e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131114880102017, "compression/movement_sparsity/model_sparsity": 0.8817432993375491, "compression_loss": 104.61946868896484, "distillation_loss": 4.865270614624023, "epoch": 4.29, "learning_rate": 3.1703766319150935e-05, "loss": 109.3787, "step": 5079, "task_loss": 2.8251864910125732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9869154888185686, "compression/movement_sparsity/importance_threshold": -9.16404465587193e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913124282642075, "compression/movement_sparsity/model_sparsity": 0.8817556544344567, "compression_loss": 104.62379455566406, "distillation_loss": 3.9382247924804688, "epoch": 4.29, "learning_rate": 3.169907016060862e-05, "loss": 108.4242, "step": 5080, "task_loss": 1.8389393091201782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9869623306994906, "compression/movement_sparsity/importance_threshold": -9.131237844629131e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131426935569048, "compression/movement_sparsity/model_sparsity": 0.8817734328777235, "compression_loss": 104.62818908691406, "distillation_loss": 3.9619786739349365, "epoch": 4.29, "learning_rate": 3.1694374002066315e-05, "loss": 108.7531, "step": 5081, "task_loss": 2.127014398574829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9870090606524351, "compression/movement_sparsity/importance_threshold": -9.098509424773551e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913176319709638, "compression/movement_sparsity/model_sparsity": 0.8818059038686641, "compression_loss": 104.6324691772461, "distillation_loss": 4.5122785568237305, "epoch": 4.3, "learning_rate": 3.1689677843524e-05, "loss": 109.0548, "step": 5082, "task_loss": 2.526580333709717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9870556788112871, "compression/movement_sparsity/importance_threshold": -9.065859302535839e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131126327302947, "compression/movement_sparsity/model_sparsity": 0.8817444047329854, "compression_loss": 104.63672637939453, "distillation_loss": 4.357398986816406, "epoch": 4.3, "learning_rate": 3.168498168498169e-05, "loss": 109.243, "step": 5083, "task_loss": 2.2652125358581543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871021853099319, "compression/movement_sparsity/importance_threshold": -9.033287384146299e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131174143215167, "compression/movement_sparsity/model_sparsity": 0.881749022061839, "compression_loss": 104.64106750488281, "distillation_loss": 4.146395683288574, "epoch": 4.3, "learning_rate": 3.1680285526439374e-05, "loss": 108.6691, "step": 5084, "task_loss": 1.9048006534576416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871485802822545, "compression/movement_sparsity/importance_threshold": -9.00079357583532e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9131353482696412, "compression/movement_sparsity/model_sparsity": 0.881766339923674, "compression_loss": 104.64531707763672, "distillation_loss": 3.0356369018554688, "epoch": 4.3, "learning_rate": 3.167558936789706e-05, "loss": 108.205, "step": 5085, "task_loss": 1.0099021196365356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9871948638621402, "compression/movement_sparsity/importance_threshold": -8.968377783833206e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913215156723628, "compression/movement_sparsity/model_sparsity": 0.8818434067117469, "compression_loss": 104.64957427978516, "distillation_loss": 4.1330718994140625, "epoch": 4.3, "learning_rate": 3.167089320935475e-05, "loss": 108.451, "step": 5086, "task_loss": 2.4595046043395996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9872410361834739, "compression/movement_sparsity/importance_threshold": -8.936039914370607e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132108044024408, "compression/movement_sparsity/model_sparsity": 0.8818392039061819, "compression_loss": 104.65386962890625, "distillation_loss": 4.441989898681641, "epoch": 4.3, "learning_rate": 3.166619705081243e-05, "loss": 108.7017, "step": 5087, "task_loss": 3.139507293701172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.987287097380141, "compression/movement_sparsity/importance_threshold": -8.903779873677652e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132372522062572, "compression/movement_sparsity/model_sparsity": 0.8818647431465741, "compression_loss": 104.65805053710938, "distillation_loss": 5.655196189880371, "epoch": 4.3, "learning_rate": 3.1661500892270126e-05, "loss": 109.1403, "step": 5088, "task_loss": 3.0189309120178223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9873330475860265, "compression/movement_sparsity/importance_threshold": -8.871597567984904e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132284283222066, "compression/movement_sparsity/model_sparsity": 0.8818562223900862, "compression_loss": 104.66229248046875, "distillation_loss": 5.313323020935059, "epoch": 4.3, "learning_rate": 3.165680473372781e-05, "loss": 108.5996, "step": 5089, "task_loss": 2.9535226821899414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9873788869350155, "compression/movement_sparsity/importance_threshold": -8.839492903522754e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132072271521501, "compression/movement_sparsity/model_sparsity": 0.8818357495454435, "compression_loss": 104.66650390625, "distillation_loss": 7.070138931274414, "epoch": 4.3, "learning_rate": 3.16521085751855e-05, "loss": 109.2462, "step": 5090, "task_loss": 4.008217811584473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9874246155609934, "compression/movement_sparsity/importance_threshold": -8.807465786521418e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913269185127186, "compression/movement_sparsity/model_sparsity": 0.881895579073432, "compression_loss": 104.67070770263672, "distillation_loss": 4.5764055252075195, "epoch": 4.3, "learning_rate": 3.1647412416643185e-05, "loss": 108.8554, "step": 5091, "task_loss": 2.3671629428863525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9874702335978449, "compression/movement_sparsity/importance_threshold": -8.775516123211546e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9132949651776148, "compression/movement_sparsity/model_sparsity": 0.8819204734998198, "compression_loss": 104.67491912841797, "distillation_loss": 3.8762803077697754, "epoch": 4.3, "learning_rate": 3.164271625810087e-05, "loss": 108.9501, "step": 5092, "task_loss": 1.462957501411438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9875157411794555, "compression/movement_sparsity/importance_threshold": -8.74364381982344e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9133516765188909, "compression/movement_sparsity/model_sparsity": 0.8819752366320586, "compression_loss": 104.67914581298828, "distillation_loss": 3.62357234954834, "epoch": 4.3, "learning_rate": 3.1638020099558564e-05, "loss": 108.8851, "step": 5093, "task_loss": 2.4960060119628906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9875611384397103, "compression/movement_sparsity/importance_threshold": -8.711848782587406e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134001363361631, "compression/movement_sparsity/model_sparsity": 0.8820220317055276, "compression_loss": 104.68325805664062, "distillation_loss": 5.020461559295654, "epoch": 4.31, "learning_rate": 3.163332394101625e-05, "loss": 108.5681, "step": 5094, "task_loss": 2.926961898803711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876064255124943, "compression/movement_sparsity/importance_threshold": -8.680130917733831e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9133786370619156, "compression/movement_sparsity/model_sparsity": 0.88200127099749, "compression_loss": 104.68740844726562, "distillation_loss": 4.894475936889648, "epoch": 4.31, "learning_rate": 3.162862778247394e-05, "loss": 108.452, "step": 5095, "task_loss": 2.347371816635132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876516025316927, "compression/movement_sparsity/importance_threshold": -8.648490131493367e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134246405006547, "compression/movement_sparsity/model_sparsity": 0.8820456940765854, "compression_loss": 104.69146728515625, "distillation_loss": 4.767604827880859, "epoch": 4.31, "learning_rate": 3.162393162393162e-05, "loss": 108.6651, "step": 5096, "task_loss": 3.22982120513916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9876966696311907, "compression/movement_sparsity/importance_threshold": -8.616926330096142e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9134568119049363, "compression/movement_sparsity/model_sparsity": 0.882076760294159, "compression_loss": 104.6956558227539, "distillation_loss": 5.728018760681152, "epoch": 4.31, "learning_rate": 3.161923546538931e-05, "loss": 109.4553, "step": 5097, "task_loss": 3.0409679412841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9877416269448734, "compression/movement_sparsity/importance_threshold": -8.585439419772634e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9135070961198568, "compression/movement_sparsity/model_sparsity": 0.8821253170916047, "compression_loss": 104.69977569580078, "distillation_loss": 3.09999418258667, "epoch": 4.31, "learning_rate": 3.1614539306847e-05, "loss": 109.3704, "step": 5098, "task_loss": 1.7193666696548462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9877864746066259, "compression/movement_sparsity/importance_threshold": -8.554029306753318e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9135459331338468, "compression/movement_sparsity/model_sparsity": 0.8821628199346875, "compression_loss": 104.70387268066406, "distillation_loss": 6.444982528686523, "epoch": 4.31, "learning_rate": 3.160984314830469e-05, "loss": 109.25, "step": 5099, "task_loss": 4.070639610290527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9878312127503334, "compression/movement_sparsity/importance_threshold": -8.522695897268586e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9136412430057602, "compression/movement_sparsity/model_sparsity": 0.8822548556192933, "compression_loss": 104.70794677734375, "distillation_loss": 5.099937438964844, "epoch": 4.31, "learning_rate": 3.1605146989762375e-05, "loss": 108.571, "step": 5100, "task_loss": 2.496345043182373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.987875841509881, "compression/movement_sparsity/importance_threshold": -8.49143909754874e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137103077847072, "compression/movement_sparsity/model_sparsity": 0.8823215478106152, "compression_loss": 104.7120590209961, "distillation_loss": 5.626680850982666, "epoch": 4.31, "learning_rate": 3.160045083122006e-05, "loss": 109.3524, "step": 5101, "task_loss": 3.508288621902466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9879203610191538, "compression/movement_sparsity/importance_threshold": -8.46025881382417e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9136771466745118, "compression/movement_sparsity/model_sparsity": 0.8822895258865706, "compression_loss": 104.7160415649414, "distillation_loss": 3.738895893096924, "epoch": 4.31, "learning_rate": 3.1595754672677755e-05, "loss": 109.3137, "step": 5102, "task_loss": 2.1533472537994385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9879647714120371, "compression/movement_sparsity/importance_threshold": -8.429154952325352e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9136926719407737, "compression/movement_sparsity/model_sparsity": 0.8823045178121751, "compression_loss": 104.72012329101562, "distillation_loss": 3.4749934673309326, "epoch": 4.31, "learning_rate": 3.159105851413544e-05, "loss": 109.2707, "step": 5103, "task_loss": 3.074566602706909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880090728224158, "compression/movement_sparsity/importance_threshold": -8.398127419282676e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.913720562568874, "compression/movement_sparsity/model_sparsity": 0.8823314503113985, "compression_loss": 104.72415924072266, "distillation_loss": 4.365281581878662, "epoch": 4.31, "learning_rate": 3.158636235559313e-05, "loss": 109.637, "step": 5104, "task_loss": 2.7593789100646973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880532653841753, "compression/movement_sparsity/importance_threshold": -8.367176120926533e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137589941611644, "compression/movement_sparsity/model_sparsity": 0.8823685616602643, "compression_loss": 104.72821807861328, "distillation_loss": 3.7468056678771973, "epoch": 4.32, "learning_rate": 3.1581666197050814e-05, "loss": 108.353, "step": 5105, "task_loss": 3.0007364749908447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9880973492312005, "compression/movement_sparsity/importance_threshold": -8.336300963487312e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137280867186522, "compression/movement_sparsity/model_sparsity": 0.8823387159834849, "compression_loss": 104.73224639892578, "distillation_loss": 4.145212173461914, "epoch": 4.32, "learning_rate": 3.15769700385085e-05, "loss": 108.5379, "step": 5106, "task_loss": 2.364569664001465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9881413244973768, "compression/movement_sparsity/importance_threshold": -8.305501853195316e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137238655633091, "compression/movement_sparsity/model_sparsity": 0.8823346398378136, "compression_loss": 104.7362289428711, "distillation_loss": 3.8106284141540527, "epoch": 4.32, "learning_rate": 3.1572273879966193e-05, "loss": 108.9211, "step": 5107, "task_loss": 1.8723922967910767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9881851913165891, "compression/movement_sparsity/importance_threshold": -8.274778696281109e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9137836533398354, "compression/movement_sparsity/model_sparsity": 0.8823923737202874, "compression_loss": 104.74028015136719, "distillation_loss": 5.631810665130615, "epoch": 4.32, "learning_rate": 3.156757772142387e-05, "loss": 108.7576, "step": 5108, "task_loss": 2.161163568496704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9882289498227227, "compression/movement_sparsity/importance_threshold": -8.24413139897508e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138630682962902, "compression/movement_sparsity/model_sparsity": 0.8824690605286791, "compression_loss": 104.74427032470703, "distillation_loss": 6.678553581237793, "epoch": 4.32, "learning_rate": 3.1562881562881566e-05, "loss": 109.5546, "step": 5109, "task_loss": 3.2221322059631348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9882726001496626, "compression/movement_sparsity/importance_threshold": -8.213559867507533e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138481988592483, "compression/movement_sparsity/model_sparsity": 0.8824547019025433, "compression_loss": 104.74825286865234, "distillation_loss": 4.360442161560059, "epoch": 4.32, "learning_rate": 3.155818540433925e-05, "loss": 108.8587, "step": 5110, "task_loss": 3.30605149269104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9883161424312941, "compression/movement_sparsity/importance_threshold": -8.183064008108771e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139562079696939, "compression/movement_sparsity/model_sparsity": 0.8825590005677703, "compression_loss": 104.75228118896484, "distillation_loss": 4.932836532592773, "epoch": 4.32, "learning_rate": 3.155348924579694e-05, "loss": 108.5993, "step": 5111, "task_loss": 2.753314971923828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9883595768015022, "compression/movement_sparsity/importance_threshold": -8.152643727009443e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139614307551184, "compression/movement_sparsity/model_sparsity": 0.8825640439344482, "compression_loss": 104.75624084472656, "distillation_loss": 5.148598670959473, "epoch": 4.32, "learning_rate": 3.1548793087254625e-05, "loss": 108.8279, "step": 5112, "task_loss": 2.9694583415985107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9884029033941721, "compression/movement_sparsity/importance_threshold": -8.122298930439767e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139584735615447, "compression/movement_sparsity/model_sparsity": 0.8825611883295712, "compression_loss": 104.76024627685547, "distillation_loss": 3.138554573059082, "epoch": 4.32, "learning_rate": 3.154409692871231e-05, "loss": 108.4484, "step": 5113, "task_loss": 1.0104743242263794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.988446122343189, "compression/movement_sparsity/importance_threshold": -8.092029524630218e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9138976722307695, "compression/movement_sparsity/model_sparsity": 0.8825024757115548, "compression_loss": 104.76422882080078, "distillation_loss": 3.8025588989257812, "epoch": 4.32, "learning_rate": 3.1539400770170005e-05, "loss": 109.316, "step": 5114, "task_loss": 1.6544033288955688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9884892337824379, "compression/movement_sparsity/importance_threshold": -8.061835415811187e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139661765738375, "compression/movement_sparsity/model_sparsity": 0.8825686267196945, "compression_loss": 104.7681884765625, "distillation_loss": 4.587072372436523, "epoch": 4.32, "learning_rate": 3.153470461162769e-05, "loss": 109.293, "step": 5115, "task_loss": 2.6435980796813965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9885322378458041, "compression/movement_sparsity/importance_threshold": -8.031716510212977e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140330115334365, "compression/movement_sparsity/model_sparsity": 0.8826331656928229, "compression_loss": 104.77214813232422, "distillation_loss": 5.269341468811035, "epoch": 4.32, "learning_rate": 3.153000845308538e-05, "loss": 110.1792, "step": 5116, "task_loss": 3.031059741973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9885751346671726, "compression/movement_sparsity/importance_threshold": -8.001672714066151e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139748335195411, "compression/movement_sparsity/model_sparsity": 0.8825769862726813, "compression_loss": 104.77610778808594, "distillation_loss": 4.452831745147705, "epoch": 4.33, "learning_rate": 3.1525312294543064e-05, "loss": 109.2125, "step": 5117, "task_loss": 2.4274466037750244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9886179243804287, "compression/movement_sparsity/importance_threshold": -7.971703933600926e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140202169015632, "compression/movement_sparsity/model_sparsity": 0.8826208105959154, "compression_loss": 104.78009796142578, "distillation_loss": 3.2454230785369873, "epoch": 4.33, "learning_rate": 3.152061613600075e-05, "loss": 108.4691, "step": 5118, "task_loss": 1.296035885810852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9886606071194574, "compression/movement_sparsity/importance_threshold": -7.941810075047951e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139668920238957, "compression/movement_sparsity/model_sparsity": 0.8825693175918422, "compression_loss": 104.7839584350586, "distillation_loss": 2.4104764461517334, "epoch": 4.33, "learning_rate": 3.151591997745844e-05, "loss": 108.2582, "step": 5119, "task_loss": 1.2541050910949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887031830181439, "compression/movement_sparsity/importance_threshold": -7.911991044637356e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139534296386348, "compression/movement_sparsity/model_sparsity": 0.8825563176809301, "compression_loss": 104.78782653808594, "distillation_loss": 3.8216357231140137, "epoch": 4.33, "learning_rate": 3.151122381891613e-05, "loss": 108.2746, "step": 5120, "task_loss": 2.261786699295044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887456522103732, "compression/movement_sparsity/importance_threshold": -7.882246748599619e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139611922717658, "compression/movement_sparsity/model_sparsity": 0.8825638136437324, "compression_loss": 104.79173278808594, "distillation_loss": 3.2182445526123047, "epoch": 4.33, "learning_rate": 3.1506527660373816e-05, "loss": 108.3626, "step": 5121, "task_loss": 1.618814468383789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9887880148300308, "compression/movement_sparsity/importance_threshold": -7.852577093165215e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139735099369336, "compression/movement_sparsity/model_sparsity": 0.8825757081592082, "compression_loss": 104.79562377929688, "distillation_loss": 4.392571926116943, "epoch": 4.33, "learning_rate": 3.15018315018315e-05, "loss": 109.0453, "step": 5122, "task_loss": 3.160836935043335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888302710110014, "compression/movement_sparsity/importance_threshold": -7.822981984564534e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139826557735102, "compression/movement_sparsity/model_sparsity": 0.8825845398081625, "compression_loss": 104.79943084716797, "distillation_loss": 3.1500296592712402, "epoch": 4.33, "learning_rate": 3.149713534328919e-05, "loss": 108.8964, "step": 5123, "task_loss": 2.643526077270508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888724208871704, "compression/movement_sparsity/importance_threshold": -7.79346132902788e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140068618338111, "compression/movement_sparsity/model_sparsity": 0.8826079143158254, "compression_loss": 104.80328369140625, "distillation_loss": 3.7356560230255127, "epoch": 4.33, "learning_rate": 3.149243918474688e-05, "loss": 109.3757, "step": 5124, "task_loss": 2.597242832183838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9889144645924229, "compression/movement_sparsity/importance_threshold": -7.764015032785728e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139743327045005, "compression/movement_sparsity/model_sparsity": 0.8825765026621779, "compression_loss": 104.80711364746094, "distillation_loss": 3.989664077758789, "epoch": 4.33, "learning_rate": 3.148774302620456e-05, "loss": 110.1292, "step": 5125, "task_loss": 1.669627070426941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9889564022606441, "compression/movement_sparsity/importance_threshold": -7.734643002068383e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139296528483688, "compression/movement_sparsity/model_sparsity": 0.8825333576965558, "compression_loss": 104.8108901977539, "distillation_loss": 4.546747207641602, "epoch": 4.33, "learning_rate": 3.1483046867662254e-05, "loss": 110.0306, "step": 5126, "task_loss": 2.9052999019622803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.988998234025719, "compression/movement_sparsity/importance_threshold": -7.705345143106408e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139597136749789, "compression/movement_sparsity/model_sparsity": 0.8825623858412939, "compression_loss": 104.81471252441406, "distillation_loss": 3.6730563640594482, "epoch": 4.33, "learning_rate": 3.147835070911994e-05, "loss": 110.2601, "step": 5127, "task_loss": 2.1810712814331055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9890399600215328, "compression/movement_sparsity/importance_threshold": -7.676121362130018e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139601071725109, "compression/movement_sparsity/model_sparsity": 0.882562765820975, "compression_loss": 104.8185043334961, "distillation_loss": 5.001622200012207, "epoch": 4.33, "learning_rate": 3.1473654550577634e-05, "loss": 109.085, "step": 5128, "task_loss": 2.9810686111450195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9890815803819707, "compression/movement_sparsity/importance_threshold": -7.646971565369778e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9139924574393069, "compression/movement_sparsity/model_sparsity": 0.8825940047565857, "compression_loss": 104.82232666015625, "distillation_loss": 5.106436729431152, "epoch": 4.34, "learning_rate": 3.146895839203532e-05, "loss": 108.606, "step": 5129, "task_loss": 3.0757975578308105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9891230952409178, "compression/movement_sparsity/importance_threshold": -7.61789565905599e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140427774267302, "compression/movement_sparsity/model_sparsity": 0.8826425960976386, "compression_loss": 104.82608032226562, "distillation_loss": 3.6434803009033203, "epoch": 4.34, "learning_rate": 3.1464262233493e-05, "loss": 108.4439, "step": 5130, "task_loss": 1.9672739505767822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9891645047322591, "compression/movement_sparsity/importance_threshold": -7.588893549419044e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140798138914072, "compression/movement_sparsity/model_sparsity": 0.8826783602458165, "compression_loss": 104.82988739013672, "distillation_loss": 3.821112632751465, "epoch": 4.34, "learning_rate": 3.145956607495069e-05, "loss": 108.6267, "step": 5131, "task_loss": 1.8236448764801025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892058089898801, "compression/movement_sparsity/importance_threshold": -7.559965142689331e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9141396016679335, "compression/movement_sparsity/model_sparsity": 0.8827360941282902, "compression_loss": 104.83360290527344, "distillation_loss": 6.156956672668457, "epoch": 4.34, "learning_rate": 3.145486991640838e-05, "loss": 110.2244, "step": 5132, "task_loss": 3.2181928157806396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892470081476655, "compression/movement_sparsity/importance_threshold": -7.531110345097413e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9141438108991089, "compression/movement_sparsity/model_sparsity": 0.8827401587594258, "compression_loss": 104.83731079101562, "distillation_loss": 4.476194381713867, "epoch": 4.34, "learning_rate": 3.145017375786607e-05, "loss": 108.8248, "step": 5133, "task_loss": 2.3508927822113037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9892881023395008, "compression/movement_sparsity/importance_threshold": -7.50232906287342e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140853228568551, "compression/movement_sparsity/model_sparsity": 0.8826836799613536, "compression_loss": 104.84107208251953, "distillation_loss": 4.446636199951172, "epoch": 4.34, "learning_rate": 3.144547759932375e-05, "loss": 109.9789, "step": 5134, "task_loss": 1.7110819816589355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.989329091699271, "compression/movement_sparsity/importance_threshold": -7.473621202247916e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9140806008864713, "compression/movement_sparsity/model_sparsity": 0.8826791202051789, "compression_loss": 104.84476470947266, "distillation_loss": 2.5562925338745117, "epoch": 4.34, "learning_rate": 3.1440781440781445e-05, "loss": 108.2313, "step": 5135, "task_loss": 1.7336677312850952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9893699763608611, "compression/movement_sparsity/importance_threshold": -7.44498666945129e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.914067639316251, "compression/movement_sparsity/model_sparsity": 0.8826666039047703, "compression_loss": 104.84848022460938, "distillation_loss": 4.606205463409424, "epoch": 4.34, "learning_rate": 3.143608528223913e-05, "loss": 108.8274, "step": 5136, "task_loss": 2.0235323905944824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894107564581565, "compression/movement_sparsity/importance_threshold": -7.416425370713933e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9141178519861657, "compression/movement_sparsity/model_sparsity": 0.8827150916150011, "compression_loss": 104.85218048095703, "distillation_loss": 5.713028907775879, "epoch": 4.34, "learning_rate": 3.143138912369682e-05, "loss": 108.8177, "step": 5137, "task_loss": 2.088653802871704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894514321250422, "compression/movement_sparsity/importance_threshold": -7.387937212266146e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.914157988734428, "compression/movement_sparsity/model_sparsity": 0.8827538495424855, "compression_loss": 104.85587310791016, "distillation_loss": 4.470807075500488, "epoch": 4.34, "learning_rate": 3.1426692965154504e-05, "loss": 109.1675, "step": 5138, "task_loss": 2.808851957321167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9894920034954032, "compression/movement_sparsity/importance_threshold": -7.359522100338581e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9141779378668828, "compression/movement_sparsity/model_sparsity": 0.8827731133608697, "compression_loss": 104.8595962524414, "distillation_loss": 3.6586737632751465, "epoch": 4.34, "learning_rate": 3.142199680661219e-05, "loss": 108.6665, "step": 5139, "task_loss": 2.053719997406006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.989532470703125, "compression/movement_sparsity/importance_threshold": -7.331179941161281e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9142161905966586, "compression/movement_sparsity/model_sparsity": 0.8828100519916986, "compression_loss": 104.86319732666016, "distillation_loss": 3.2243106365203857, "epoch": 4.34, "learning_rate": 3.141730064806988e-05, "loss": 108.8292, "step": 5140, "task_loss": 1.8831080198287964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9895728338820925, "compression/movement_sparsity/importance_threshold": -7.302910640964895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9142782916617063, "compression/movement_sparsity/model_sparsity": 0.8828700196941166, "compression_loss": 104.86686706542969, "distillation_loss": 3.329256534576416, "epoch": 4.35, "learning_rate": 3.141260448952757e-05, "loss": 108.6191, "step": 5141, "task_loss": 2.3088643550872803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896130931661908, "compression/movement_sparsity/importance_threshold": -7.274714105979727e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91426172899286, "compression/movement_sparsity/model_sparsity": 0.882854026003898, "compression_loss": 104.87046813964844, "distillation_loss": 4.86141300201416, "epoch": 4.35, "learning_rate": 3.1407908330985256e-05, "loss": 109.3277, "step": 5142, "task_loss": 2.388437032699585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896532486893052, "compression/movement_sparsity/importance_threshold": -7.246590242436252e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9142341364689506, "compression/movement_sparsity/model_sparsity": 0.8828273813680694, "compression_loss": 104.87413787841797, "distillation_loss": 4.613224029541016, "epoch": 4.35, "learning_rate": 3.140321217244294e-05, "loss": 108.9939, "step": 5143, "task_loss": 3.561182737350464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9896933005853207, "compression/movement_sparsity/importance_threshold": -7.218538956564862e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9142767295957459, "compression/movement_sparsity/model_sparsity": 0.8828685112899275, "compression_loss": 104.877685546875, "distillation_loss": 4.259998321533203, "epoch": 4.35, "learning_rate": 3.139851601390063e-05, "loss": 108.591, "step": 5144, "task_loss": 2.5158400535583496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9897332489881225, "compression/movement_sparsity/importance_threshold": -7.190560154595772e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9143181541541129, "compression/movement_sparsity/model_sparsity": 0.8829085127872778, "compression_loss": 104.88125610351562, "distillation_loss": 4.373218536376953, "epoch": 4.35, "learning_rate": 3.139381985535832e-05, "loss": 108.7652, "step": 5145, "task_loss": 2.8389768600463867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9897730940315957, "compression/movement_sparsity/importance_threshold": -7.162653742759633e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9143566095947385, "compression/movement_sparsity/model_sparsity": 0.8829456471652152, "compression_loss": 104.88475799560547, "distillation_loss": 5.307328224182129, "epoch": 4.35, "learning_rate": 3.138912369681601e-05, "loss": 109.1314, "step": 5146, "task_loss": 2.4835269451141357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898128358496256, "compression/movement_sparsity/importance_threshold": -7.134819627286661e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.914423921521043, "compression/movement_sparsity/model_sparsity": 0.8830106467197754, "compression_loss": 104.8883056640625, "distillation_loss": 3.563798427581787, "epoch": 4.35, "learning_rate": 3.1384427538273694e-05, "loss": 108.8791, "step": 5147, "task_loss": 1.530679702758789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898524745760973, "compression/movement_sparsity/importance_threshold": -7.107057714407332e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9144690544955447, "compression/movement_sparsity/model_sparsity": 0.8830542292377577, "compression_loss": 104.89183044433594, "distillation_loss": 2.8608226776123047, "epoch": 4.35, "learning_rate": 3.137973137973138e-05, "loss": 108.3913, "step": 5148, "task_loss": 1.624121069908142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9898920103448957, "compression/movement_sparsity/importance_threshold": -7.079367910352036e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9144665861928442, "compression/movement_sparsity/model_sparsity": 0.8830518457288482, "compression_loss": 104.89539337158203, "distillation_loss": 3.357325792312622, "epoch": 4.35, "learning_rate": 3.137503522118907e-05, "loss": 108.7136, "step": 5149, "task_loss": 1.8940143585205078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9899314432899061, "compression/movement_sparsity/importance_threshold": -7.051750121351164e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145318829348181, "compression/movement_sparsity/model_sparsity": 0.883114899326859, "compression_loss": 104.89884948730469, "distillation_loss": 3.313788890838623, "epoch": 4.35, "learning_rate": 3.137033906264676e-05, "loss": 108.6425, "step": 5150, "task_loss": 1.9851895570755005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9899707735450137, "compression/movement_sparsity/importance_threshold": -7.024204253635105e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9145992664061283, "compression/movement_sparsity/model_sparsity": 0.8831799679686341, "compression_loss": 104.9023666381836, "distillation_loss": 4.493631362915039, "epoch": 4.35, "learning_rate": 3.136564290410444e-05, "loss": 109.3503, "step": 5151, "task_loss": 2.578348159790039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900100012441035, "compression/movement_sparsity/importance_threshold": -6.996730213434336e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9146714195444929, "compression/movement_sparsity/model_sparsity": 0.8832496424247268, "compression_loss": 104.90584564208984, "distillation_loss": 3.491774797439575, "epoch": 4.35, "learning_rate": 3.136094674556213e-05, "loss": 109.0113, "step": 5152, "task_loss": 2.686933755874634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900491265210609, "compression/movement_sparsity/importance_threshold": -6.969327906979073e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147356431113798, "compression/movement_sparsity/model_sparsity": 0.8833116597145162, "compression_loss": 104.90933227539062, "distillation_loss": 4.047588348388672, "epoch": 4.36, "learning_rate": 3.135625058701982e-05, "loss": 108.8499, "step": 5153, "task_loss": 2.6929171085357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9900881495097708, "compression/movement_sparsity/importance_threshold": -6.941997240499879e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148346494752603, "compression/movement_sparsity/model_sparsity": 0.8834072649052183, "compression_loss": 104.91276550292969, "distillation_loss": 3.424588918685913, "epoch": 4.36, "learning_rate": 3.1351554428477506e-05, "loss": 108.1409, "step": 5154, "task_loss": 2.34260892868042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9901270703441183, "compression/movement_sparsity/importance_threshold": -6.914738120227144e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148297247940267, "compression/movement_sparsity/model_sparsity": 0.8834025094019351, "compression_loss": 104.91621398925781, "distillation_loss": 3.7153120040893555, "epoch": 4.36, "learning_rate": 3.134685826993519e-05, "loss": 109.1966, "step": 5155, "task_loss": 2.24242901802063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9901658891579888, "compression/movement_sparsity/importance_threshold": -6.887550452391172e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148127805518161, "compression/movement_sparsity/model_sparsity": 0.883386147246571, "compression_loss": 104.91961669921875, "distillation_loss": 3.4880757331848145, "epoch": 4.36, "learning_rate": 3.134216211139288e-05, "loss": 109.0609, "step": 5156, "task_loss": 1.8820438385009766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902046060852672, "compression/movement_sparsity/importance_threshold": -6.86043414322244e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147581797882115, "compression/movement_sparsity/model_sparsity": 0.8833334221871679, "compression_loss": 104.92298889160156, "distillation_loss": 3.808260440826416, "epoch": 4.36, "learning_rate": 3.133746595285057e-05, "loss": 109.3731, "step": 5157, "task_loss": 2.200307846069336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902432212598387, "compression/movement_sparsity/importance_threshold": -6.833389098951335e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147513710884915, "compression/movement_sparsity/model_sparsity": 0.8833268473872292, "compression_loss": 104.92642974853516, "distillation_loss": 3.0494039058685303, "epoch": 4.36, "learning_rate": 3.133276979430826e-05, "loss": 108.7687, "step": 5158, "task_loss": 1.3738709688186646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9902817348155885, "compression/movement_sparsity/importance_threshold": -6.80641522580825e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9147351899930096, "compression/movement_sparsity/model_sparsity": 0.883311222162156, "compression_loss": 104.92986297607422, "distillation_loss": 3.6871981620788574, "epoch": 4.36, "learning_rate": 3.132807363576595e-05, "loss": 109.1436, "step": 5159, "task_loss": 2.0332021713256836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9903201468864018, "compression/movement_sparsity/importance_threshold": -6.779512430023487e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148335524518378, "compression/movement_sparsity/model_sparsity": 0.8834062055679252, "compression_loss": 104.93321990966797, "distillation_loss": 3.729799747467041, "epoch": 4.36, "learning_rate": 3.132337747722363e-05, "loss": 108.651, "step": 5160, "task_loss": 2.0735132694244385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9903584576061635, "compression/movement_sparsity/importance_threshold": -6.752680617827609e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148377855313485, "compression/movement_sparsity/model_sparsity": 0.8834102932281322, "compression_loss": 104.9365463256836, "distillation_loss": 4.633014678955078, "epoch": 4.36, "learning_rate": 3.131868131868132e-05, "loss": 109.3911, "step": 5161, "task_loss": 2.3478262424468994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990396667108759, "compression/movement_sparsity/importance_threshold": -6.72591969545092e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9148527384375639, "compression/movement_sparsity/model_sparsity": 0.8834247324560186, "compression_loss": 104.93992614746094, "distillation_loss": 5.938641548156738, "epoch": 4.36, "learning_rate": 3.131398516013901e-05, "loss": 109.9306, "step": 5162, "task_loss": 2.2888729572296143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9904347755280732, "compression/movement_sparsity/importance_threshold": -6.699229569123808e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149201457572095, "compression/movement_sparsity/model_sparsity": 0.8834898241268652, "compression_loss": 104.94325256347656, "distillation_loss": 4.739206314086914, "epoch": 4.36, "learning_rate": 3.1309289001596696e-05, "loss": 109.4739, "step": 5163, "task_loss": 2.029761552810669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9904727829979916, "compression/movement_sparsity/importance_threshold": -6.672610145076665e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9149523052373234, "compression/movement_sparsity/model_sparsity": 0.883520878829903, "compression_loss": 104.94660949707031, "distillation_loss": 5.401961326599121, "epoch": 4.36, "learning_rate": 3.130459284305438e-05, "loss": 110.1893, "step": 5164, "task_loss": 3.1372413635253906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990510689652399, "compression/movement_sparsity/importance_threshold": -6.64606132953988e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9150199271919863, "compression/movement_sparsity/model_sparsity": 0.8835861777623939, "compression_loss": 104.94989776611328, "distillation_loss": 4.2096662521362305, "epoch": 4.37, "learning_rate": 3.129989668451207e-05, "loss": 108.9104, "step": 5165, "task_loss": 1.871734380722046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9905484956251805, "compression/movement_sparsity/importance_threshold": -6.619583028744016e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9150929746429236, "compression/movement_sparsity/model_sparsity": 0.8836567158086712, "compression_loss": 104.95315551757812, "distillation_loss": 4.093985557556152, "epoch": 4.37, "learning_rate": 3.129520052596976e-05, "loss": 109.2454, "step": 5166, "task_loss": 2.859217405319214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9905862010502215, "compression/movement_sparsity/importance_threshold": -6.59317514891929e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151467407147937, "compression/movement_sparsity/model_sparsity": 0.8837086348505688, "compression_loss": 104.95645904541016, "distillation_loss": 5.142999649047852, "epoch": 4.37, "learning_rate": 3.129050436742745e-05, "loss": 109.0465, "step": 5167, "task_loss": 3.2797698974609375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906238060614072, "compression/movement_sparsity/importance_threshold": -6.566837596296091e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151496621358646, "compression/movement_sparsity/model_sparsity": 0.8837114559118384, "compression_loss": 104.9597396850586, "distillation_loss": 3.6974377632141113, "epoch": 4.37, "learning_rate": 3.128580820888513e-05, "loss": 108.6989, "step": 5168, "task_loss": 1.7171962261199951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906613107926224, "compression/movement_sparsity/importance_threshold": -6.540570277104897e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9151798779766538, "compression/movement_sparsity/model_sparsity": 0.8837406337455418, "compression_loss": 104.96306610107422, "distillation_loss": 4.341638088226318, "epoch": 4.37, "learning_rate": 3.128111205034282e-05, "loss": 109.1302, "step": 5169, "task_loss": 2.4382357597351074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906987153777524, "compression/movement_sparsity/importance_threshold": -6.514373097576184e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915240190416556, "compression/movement_sparsity/model_sparsity": 0.8837988742675906, "compression_loss": 104.96639251708984, "distillation_loss": 5.1464691162109375, "epoch": 4.37, "learning_rate": 3.127641589180051e-05, "loss": 109.8384, "step": 5170, "task_loss": 2.981036901473999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9907360199506825, "compression/movement_sparsity/importance_threshold": -6.488245963940167e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9152430879892915, "compression/movement_sparsity/model_sparsity": 0.8838016722997887, "compression_loss": 104.96966552734375, "distillation_loss": 5.598273277282715, "epoch": 4.37, "learning_rate": 3.12717197332582e-05, "loss": 109.2275, "step": 5171, "task_loss": 3.0510916709899902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9907732246452976, "compression/movement_sparsity/importance_threshold": -6.462188782427412e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153133332608344, "compression/movement_sparsity/model_sparsity": 0.8838695044301543, "compression_loss": 104.97297668457031, "distillation_loss": 4.7313032150268555, "epoch": 4.37, "learning_rate": 3.126702357471588e-05, "loss": 109.1807, "step": 5172, "task_loss": 2.4155080318450928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990810329595483, "compression/movement_sparsity/importance_threshold": -6.436201459268133e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153414027514493, "compression/movement_sparsity/model_sparsity": 0.8838966096474147, "compression_loss": 104.97627258300781, "distillation_loss": 5.187762260437012, "epoch": 4.37, "learning_rate": 3.126232741617357e-05, "loss": 109.5827, "step": 5173, "task_loss": 3.240830659866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9908473349351238, "compression/movement_sparsity/importance_threshold": -6.410283900692808e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153681367352888, "compression/movement_sparsity/model_sparsity": 0.883922425236666, "compression_loss": 104.97953033447266, "distillation_loss": 4.195384979248047, "epoch": 4.37, "learning_rate": 3.125763125763126e-05, "loss": 109.0365, "step": 5174, "task_loss": 2.2656729221343994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.990884240798105, "compression/movement_sparsity/importance_threshold": -6.384436012931913e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9153929986248096, "compression/movement_sparsity/model_sparsity": 0.8839464330437976, "compression_loss": 104.98286437988281, "distillation_loss": 3.8446359634399414, "epoch": 4.37, "learning_rate": 3.1252935099088946e-05, "loss": 108.9258, "step": 5175, "task_loss": 2.444943904876709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909210473183119, "compression/movement_sparsity/importance_threshold": -6.358657702215838e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9154103005920492, "compression/movement_sparsity/model_sparsity": 0.8839631406352355, "compression_loss": 104.98612976074219, "distillation_loss": 5.214948654174805, "epoch": 4.38, "learning_rate": 3.124823894054664e-05, "loss": 109.741, "step": 5176, "task_loss": 3.891021966934204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909577546296297, "compression/movement_sparsity/importance_threshold": -6.332948874774886e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9154084165735628, "compression/movement_sparsity/model_sparsity": 0.88396132133858, "compression_loss": 104.98937225341797, "distillation_loss": 5.145688533782959, "epoch": 4.38, "learning_rate": 3.124354278200432e-05, "loss": 109.879, "step": 5177, "task_loss": 2.7917745113372803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9909943628659433, "compression/movement_sparsity/importance_threshold": -6.307309436839447e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9154349239982172, "compression/movement_sparsity/model_sparsity": 0.8839869181516512, "compression_loss": 104.99263000488281, "distillation_loss": 4.5916900634765625, "epoch": 4.38, "learning_rate": 3.123884662346201e-05, "loss": 109.2796, "step": 5178, "task_loss": 3.4848484992980957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991030872161138, "compression/movement_sparsity/importance_threshold": -6.281739294640085e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9154578422484134, "compression/movement_sparsity/model_sparsity": 0.8840090490894483, "compression_loss": 104.9958724975586, "distillation_loss": 4.382300853729248, "epoch": 4.38, "learning_rate": 3.12341504649197e-05, "loss": 109.7403, "step": 5179, "task_loss": 3.206920623779297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991067282649099, "compression/movement_sparsity/importance_threshold": -6.256238354407015e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9155544160820963, "compression/movement_sparsity/model_sparsity": 0.8841023053148483, "compression_loss": 104.99909210205078, "distillation_loss": 4.8595781326293945, "epoch": 4.38, "learning_rate": 3.1229454306377384e-05, "loss": 109.1354, "step": 5180, "task_loss": 3.020352840423584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911035944637113, "compression/movement_sparsity/importance_threshold": -6.230806522370715e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9156044498894964, "compression/movement_sparsity/model_sparsity": 0.8841506203070423, "compression_loss": 105.00225067138672, "distillation_loss": 5.12705135345459, "epoch": 4.38, "learning_rate": 3.122475814783507e-05, "loss": 108.9351, "step": 5181, "task_loss": 2.468276023864746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911398077388601, "compression/movement_sparsity/importance_threshold": -6.20544370476166e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9156905423798273, "compression/movement_sparsity/model_sparsity": 0.8842337552554789, "compression_loss": 105.0054931640625, "distillation_loss": 3.2620387077331543, "epoch": 4.38, "learning_rate": 3.122006198929276e-05, "loss": 109.459, "step": 5182, "task_loss": 2.3089542388916016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9911759226084307, "compression/movement_sparsity/importance_threshold": -6.180149807809981e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9157700646537908, "compression/movement_sparsity/model_sparsity": 0.8843105456946926, "compression_loss": 105.00869750976562, "distillation_loss": 3.813184976577759, "epoch": 4.38, "learning_rate": 3.121536583075045e-05, "loss": 108.9346, "step": 5183, "task_loss": 2.378330707550049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912119392063079, "compression/movement_sparsity/importance_threshold": -6.154924737746415e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9157795205187261, "compression/movement_sparsity/model_sparsity": 0.8843196767215776, "compression_loss": 105.01184844970703, "distillation_loss": 3.9441606998443604, "epoch": 4.38, "learning_rate": 3.1210669672208136e-05, "loss": 109.1843, "step": 5184, "task_loss": 2.789583444595337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912478576663771, "compression/movement_sparsity/importance_threshold": -6.129768400801091e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9157608234238731, "compression/movement_sparsity/model_sparsity": 0.8843016219294518, "compression_loss": 105.01504516601562, "distillation_loss": 4.102852821350098, "epoch": 4.38, "learning_rate": 3.120597351366582e-05, "loss": 109.4644, "step": 5185, "task_loss": 2.080470323562622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9912836781225234, "compression/movement_sparsity/importance_threshold": -6.104680703204659e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9158012821246615, "compression/movement_sparsity/model_sparsity": 0.8843406907494027, "compression_loss": 105.01820373535156, "distillation_loss": 2.7837815284729004, "epoch": 4.38, "learning_rate": 3.120127735512351e-05, "loss": 109.0451, "step": 5186, "task_loss": 2.034298896789551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9913194007086319, "compression/movement_sparsity/importance_threshold": -6.079661551187249e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.915877990295063, "compression/movement_sparsity/model_sparsity": 0.884414763758169, "compression_loss": 105.02132415771484, "distillation_loss": 5.289899826049805, "epoch": 4.38, "learning_rate": 3.1196581196581195e-05, "loss": 109.0506, "step": 5187, "task_loss": 2.0994086265563965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9913550255585877, "compression/movement_sparsity/importance_threshold": -6.054710850979424e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159320783194593, "compression/movement_sparsity/model_sparsity": 0.884466993692533, "compression_loss": 105.02450561523438, "distillation_loss": 2.5548782348632812, "epoch": 4.39, "learning_rate": 3.119188503803889e-05, "loss": 108.5132, "step": 5188, "task_loss": 1.4657349586486816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991390552806276, "compression/movement_sparsity/importance_threshold": -6.029828508811574e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916018111188952, "compression/movement_sparsity/model_sparsity": 0.8845500710682906, "compression_loss": 105.0275650024414, "distillation_loss": 2.1204538345336914, "epoch": 4.39, "learning_rate": 3.118718887949657e-05, "loss": 108.6126, "step": 5189, "task_loss": 2.1900856494903564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914259825855819, "compression/movement_sparsity/importance_threshold": -6.005014430914002e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160028721027135, "compression/movement_sparsity/model_sparsity": 0.8845353554915453, "compression_loss": 105.0306396484375, "distillation_loss": 5.179567337036133, "epoch": 4.39, "learning_rate": 3.118249272095426e-05, "loss": 109.7562, "step": 5190, "task_loss": 2.449612855911255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914613150303906, "compression/movement_sparsity/importance_threshold": -5.980268523517185e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159807408475813, "compression/movement_sparsity/model_sparsity": 0.8845139845131106, "compression_loss": 105.03373718261719, "distillation_loss": 3.9097352027893066, "epoch": 4.39, "learning_rate": 3.117779656241195e-05, "loss": 108.818, "step": 5191, "task_loss": 1.9897174835205078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9914965502745872, "compression/movement_sparsity/importance_threshold": -5.955590692851599e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9159594085116807, "compression/movement_sparsity/model_sparsity": 0.8844933850085741, "compression_loss": 105.036865234375, "distillation_loss": 5.177759170532227, "epoch": 4.39, "learning_rate": 3.1173100403869634e-05, "loss": 109.3945, "step": 5192, "task_loss": 3.0190560817718506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9915316884520569, "compression/movement_sparsity/importance_threshold": -5.930980845147461e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160386326814534, "compression/movement_sparsity/model_sparsity": 0.884569887584393, "compression_loss": 105.03993225097656, "distillation_loss": 4.347969055175781, "epoch": 4.39, "learning_rate": 3.116840424532733e-05, "loss": 108.6545, "step": 5193, "task_loss": 2.2191271781921387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9915667296966847, "compression/movement_sparsity/importance_threshold": -5.906438886635248e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160528939859459, "compression/movement_sparsity/model_sparsity": 0.8845836589692033, "compression_loss": 105.04303741455078, "distillation_loss": 5.541364669799805, "epoch": 4.39, "learning_rate": 3.116370808678501e-05, "loss": 109.5767, "step": 5194, "task_loss": 2.881110906600952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916016741423559, "compression/movement_sparsity/importance_threshold": -5.8819647235453486e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160739043693202, "compression/movement_sparsity/model_sparsity": 0.8846039475812733, "compression_loss": 105.04610443115234, "distillation_loss": 3.384204387664795, "epoch": 4.39, "learning_rate": 3.11590119282427e-05, "loss": 108.9597, "step": 5195, "task_loss": 1.069176197052002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916365219229556, "compression/movement_sparsity/importance_threshold": -5.85755826210824e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160279963239222, "compression/movement_sparsity/model_sparsity": 0.8845596166184643, "compression_loss": 105.04917907714844, "distillation_loss": 4.428487300872803, "epoch": 4.39, "learning_rate": 3.1154315769700386e-05, "loss": 109.1245, "step": 5196, "task_loss": 2.1247103214263916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9916712731723689, "compression/movement_sparsity/importance_threshold": -5.833219408554226e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9160003441791746, "compression/movement_sparsity/model_sparsity": 0.8845329144099567, "compression_loss": 105.05216979980469, "distillation_loss": 6.729247570037842, "epoch": 4.39, "learning_rate": 3.114961961115808e-05, "loss": 109.5855, "step": 5197, "task_loss": 3.328348398208618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917059280244809, "compression/movement_sparsity/importance_threshold": -5.808948069113782e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916006652063854, "compression/movement_sparsity/model_sparsity": 0.8845390055993921, "compression_loss": 105.05521392822266, "distillation_loss": 2.5887954235076904, "epoch": 4.39, "learning_rate": 3.114492345261576e-05, "loss": 109.094, "step": 5198, "task_loss": 1.767388105392456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917404866131768, "compression/movement_sparsity/importance_threshold": -5.784744150017212e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91606795420967, "compression/movement_sparsity/model_sparsity": 0.8845982018279118, "compression_loss": 105.05825805664062, "distillation_loss": 4.55316686630249, "epoch": 4.39, "learning_rate": 3.114022729407345e-05, "loss": 109.1969, "step": 5199, "task_loss": 3.254136562347412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9917749490723418, "compression/movement_sparsity/importance_threshold": -5.760607557494992e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161305560897582, "compression/movement_sparsity/model_sparsity": 0.8846586531408331, "compression_loss": 105.06127166748047, "distillation_loss": 3.6371212005615234, "epoch": 4.4, "learning_rate": 3.113553113553114e-05, "loss": 109.2471, "step": 5200, "task_loss": 1.6408169269561768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918093155358609, "compression/movement_sparsity/importance_threshold": -5.736538197777426e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161105592606329, "compression/movement_sparsity/model_sparsity": 0.8846393432643057, "compression_loss": 105.06424713134766, "distillation_loss": 6.0265398025512695, "epoch": 4.4, "learning_rate": 3.1130834976988825e-05, "loss": 109.7745, "step": 5201, "task_loss": 3.2487926483154297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918435861376194, "compression/movement_sparsity/importance_threshold": -5.712535977095076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161418602006771, "compression/movement_sparsity/model_sparsity": 0.8846695689207664, "compression_loss": 105.0672836303711, "distillation_loss": 4.190788269042969, "epoch": 4.4, "learning_rate": 3.112613881844651e-05, "loss": 109.7062, "step": 5202, "task_loss": 1.9235881567001343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9918777610115023, "compression/movement_sparsity/importance_threshold": -5.68860080167816e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916117463353694, "compression/movement_sparsity/model_sparsity": 0.8846460101805308, "compression_loss": 105.0703125, "distillation_loss": 5.201738357543945, "epoch": 4.4, "learning_rate": 3.11214426599042e-05, "loss": 108.7223, "step": 5203, "task_loss": 2.6606781482696533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9919118402913948, "compression/movement_sparsity/importance_threshold": -5.664732577757153e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161768099360178, "compression/movement_sparsity/model_sparsity": 0.8847033180251802, "compression_loss": 105.07328033447266, "distillation_loss": 4.3482561111450195, "epoch": 4.4, "learning_rate": 3.111674650136189e-05, "loss": 108.881, "step": 5204, "task_loss": 2.82692551612854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.991945824111182, "compression/movement_sparsity/importance_threshold": -5.640931211562533e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9161644803466823, "compression/movement_sparsity/model_sparsity": 0.8846914119951687, "compression_loss": 105.07627868652344, "distillation_loss": 4.290942668914795, "epoch": 4.4, "learning_rate": 3.111205034281958e-05, "loss": 109.1719, "step": 5205, "task_loss": 2.9466912746429443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9919797126047492, "compression/movement_sparsity/importance_threshold": -5.617196609324516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162554617457439, "compression/movement_sparsity/model_sparsity": 0.884779267903281, "compression_loss": 105.07929229736328, "distillation_loss": 4.758189678192139, "epoch": 4.4, "learning_rate": 3.110735418427726e-05, "loss": 110.1907, "step": 5206, "task_loss": 2.4261672496795654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920135059059813, "compression/movement_sparsity/importance_threshold": -5.5935286772737514e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162495354344289, "compression/movement_sparsity/model_sparsity": 0.8847735451789911, "compression_loss": 105.08228302001953, "distillation_loss": 4.856335639953613, "epoch": 4.4, "learning_rate": 3.110265802573495e-05, "loss": 109.4997, "step": 5207, "task_loss": 2.2679269313812256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920472041487636, "compression/movement_sparsity/importance_threshold": -5.5699273216403694e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162789404318189, "compression/movement_sparsity/model_sparsity": 0.8848019400242605, "compression_loss": 105.08528137207031, "distillation_loss": 4.781373500823975, "epoch": 4.4, "learning_rate": 3.1097961867192636e-05, "loss": 109.3018, "step": 5208, "task_loss": 3.045623779296875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9920808074669812, "compression/movement_sparsity/importance_threshold": -5.5463924486548466e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162843659280933, "compression/movement_sparsity/model_sparsity": 0.884807179138047, "compression_loss": 105.08821868896484, "distillation_loss": 4.957886695861816, "epoch": 4.4, "learning_rate": 3.109326570865033e-05, "loss": 109.6677, "step": 5209, "task_loss": 2.168710708618164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921143159945193, "compression/movement_sparsity/importance_threshold": -5.522923964547746e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9162806336636232, "compression/movement_sparsity/model_sparsity": 0.8848035750883433, "compression_loss": 105.09123229980469, "distillation_loss": 3.7046523094177246, "epoch": 4.4, "learning_rate": 3.1088569550108015e-05, "loss": 109.2973, "step": 5210, "task_loss": 2.2956881523132324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921477298652629, "compression/movement_sparsity/importance_threshold": -5.499521775549285e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916322392098684, "compression/movement_sparsity/model_sparsity": 0.8848438989926958, "compression_loss": 105.09418487548828, "distillation_loss": 4.785365581512451, "epoch": 4.4, "learning_rate": 3.10838733915657e-05, "loss": 108.9035, "step": 5211, "task_loss": 1.9259170293807983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9921810492130972, "compression/movement_sparsity/importance_threshold": -5.476185787889939e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9163803197050591, "compression/movement_sparsity/model_sparsity": 0.8848998366075856, "compression_loss": 105.09716796875, "distillation_loss": 3.4944117069244385, "epoch": 4.41, "learning_rate": 3.107917723302339e-05, "loss": 109.2491, "step": 5212, "task_loss": 2.2266061305999756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922142741719074, "compression/movement_sparsity/importance_threshold": -5.452915907800098e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164475123896871, "compression/movement_sparsity/model_sparsity": 0.8849647210167879, "compression_loss": 105.10005950927734, "distillation_loss": 4.810935020446777, "epoch": 4.41, "learning_rate": 3.1074481074481074e-05, "loss": 109.4236, "step": 5213, "task_loss": 1.8845292329788208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922474048755787, "compression/movement_sparsity/importance_threshold": -5.429712041510066e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164710387724327, "compression/movement_sparsity/model_sparsity": 0.8849874391959105, "compression_loss": 105.10299682617188, "distillation_loss": 5.648877143859863, "epoch": 4.41, "learning_rate": 3.106978491593877e-05, "loss": 108.8446, "step": 5214, "task_loss": 3.403153419494629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9922804414579961, "compression/movement_sparsity/importance_threshold": -5.4065740952504056e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165145739084712, "compression/movement_sparsity/model_sparsity": 0.8850294787660964, "compression_loss": 105.10582733154297, "distillation_loss": 4.363511085510254, "epoch": 4.41, "learning_rate": 3.106508875739645e-05, "loss": 109.3709, "step": 5215, "task_loss": 3.137528896331787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923133840530447, "compression/movement_sparsity/importance_threshold": -5.38350197525142e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164697509623281, "compression/movement_sparsity/model_sparsity": 0.8849861956260446, "compression_loss": 105.10870361328125, "distillation_loss": 4.313808441162109, "epoch": 4.41, "learning_rate": 3.106039259885414e-05, "loss": 109.1564, "step": 5216, "task_loss": 1.8479979038238525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923462327946098, "compression/movement_sparsity/importance_threshold": -5.360495587743586e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164773943537826, "compression/movement_sparsity/model_sparsity": 0.884993576443489, "compression_loss": 105.11153411865234, "distillation_loss": 5.190854072570801, "epoch": 4.41, "learning_rate": 3.1055696440311826e-05, "loss": 109.4673, "step": 5217, "task_loss": 3.533594846725464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9923789878165764, "compression/movement_sparsity/importance_threshold": -5.3375548389571204e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9164991917322209, "compression/movement_sparsity/model_sparsity": 0.8850146250149215, "compression_loss": 105.1143798828125, "distillation_loss": 4.179282188415527, "epoch": 4.41, "learning_rate": 3.105100028176951e-05, "loss": 109.0415, "step": 5218, "task_loss": 3.01669979095459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9924116492528298, "compression/movement_sparsity/importance_threshold": -5.314679635122586e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165469480236026, "compression/movement_sparsity/model_sparsity": 0.8850607407307786, "compression_loss": 105.11717224121094, "distillation_loss": 3.131842851638794, "epoch": 4.41, "learning_rate": 3.10463041232272e-05, "loss": 108.5636, "step": 5219, "task_loss": 2.3780667781829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.992444217237255, "compression/movement_sparsity/importance_threshold": -5.291869882470372e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165299322363862, "compression/movement_sparsity/model_sparsity": 0.8850443094881997, "compression_loss": 105.11996459960938, "distillation_loss": 5.197847366333008, "epoch": 4.41, "learning_rate": 3.1041607964684885e-05, "loss": 109.2749, "step": 5220, "task_loss": 2.126046657562256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9924766919037372, "compression/movement_sparsity/importance_threshold": -5.269125487230696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165645242466979, "compression/movement_sparsity/model_sparsity": 0.8850777131565397, "compression_loss": 105.12274169921875, "distillation_loss": 4.998417377471924, "epoch": 4.41, "learning_rate": 3.103691180614258e-05, "loss": 109.4562, "step": 5221, "task_loss": 3.4918718338012695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925090733861615, "compression/movement_sparsity/importance_threshold": -5.246446355634208e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165749459692116, "compression/movement_sparsity/model_sparsity": 0.8850877768608241, "compression_loss": 105.12553405761719, "distillation_loss": 4.365262031555176, "epoch": 4.41, "learning_rate": 3.1032215647600265e-05, "loss": 108.9588, "step": 5222, "task_loss": 2.1591427326202393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925413618184131, "compression/movement_sparsity/importance_threshold": -5.2238323939111236e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165188546846526, "compression/movement_sparsity/model_sparsity": 0.8850336124844466, "compression_loss": 105.1282958984375, "distillation_loss": 3.4839558601379395, "epoch": 4.41, "learning_rate": 3.102751948905796e-05, "loss": 109.4647, "step": 5223, "task_loss": 1.432780146598816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925735573343771, "compression/movement_sparsity/importance_threshold": -5.20128350829192e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165048080151774, "compression/movement_sparsity/model_sparsity": 0.8850200483612807, "compression_loss": 105.13108825683594, "distillation_loss": 2.9023187160491943, "epoch": 4.42, "learning_rate": 3.102282333051564e-05, "loss": 108.5372, "step": 5224, "task_loss": 1.7747489213943481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9926056600679386, "compression/movement_sparsity/importance_threshold": -5.1787996050070735e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9165949189500017, "compression/movement_sparsity/model_sparsity": 0.88510706370828, "compression_loss": 105.13382720947266, "distillation_loss": 4.6007585525512695, "epoch": 4.42, "learning_rate": 3.1018127171973324e-05, "loss": 109.1657, "step": 5225, "task_loss": 2.361626148223877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9926376701529829, "compression/movement_sparsity/importance_threshold": -5.156380590286801e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166634829139079, "compression/movement_sparsity/model_sparsity": 0.8851732722890986, "compression_loss": 105.13655090332031, "distillation_loss": 3.852369546890259, "epoch": 4.42, "learning_rate": 3.101343101343102e-05, "loss": 108.9645, "step": 5226, "task_loss": 2.1826553344726562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.992669587723395, "compression/movement_sparsity/importance_threshold": -5.134026370361665e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9166910158169791, "compression/movement_sparsity/model_sparsity": 0.8851998593522482, "compression_loss": 105.13926696777344, "distillation_loss": 4.546185493469238, "epoch": 4.42, "learning_rate": 3.10087348548887e-05, "loss": 108.9962, "step": 5227, "task_loss": 2.1144495010375977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99270141291306, "compression/movement_sparsity/importance_threshold": -5.111736851461882e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916755728274739, "compression/movement_sparsity/model_sparsity": 0.8852623487380051, "compression_loss": 105.14201354980469, "distillation_loss": 2.830380439758301, "epoch": 4.42, "learning_rate": 3.100403869634639e-05, "loss": 109.0362, "step": 5228, "task_loss": 1.2092900276184082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927331458558631, "compression/movement_sparsity/importance_threshold": -5.089511939818103e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167049193964425, "compression/movement_sparsity/model_sparsity": 0.8852132853009846, "compression_loss": 105.14472198486328, "distillation_loss": 4.088048458099365, "epoch": 4.42, "learning_rate": 3.0999342537804076e-05, "loss": 108.6941, "step": 5229, "task_loss": 3.030275821685791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927647866856896, "compression/movement_sparsity/importance_threshold": -5.067351541660457e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91675077974517, "compression/movement_sparsity/model_sparsity": 0.8852575702056504, "compression_loss": 105.14742279052734, "distillation_loss": 4.595587730407715, "epoch": 4.42, "learning_rate": 3.099464637926177e-05, "loss": 109.3805, "step": 5230, "task_loss": 1.858496069908142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9927963355364243, "compression/movement_sparsity/importance_threshold": -5.0452555632195945e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916702021823707, "compression/movement_sparsity/model_sparsity": 0.8852104872687865, "compression_loss": 105.15009307861328, "distillation_loss": 3.731786012649536, "epoch": 4.42, "learning_rate": 3.0989950220719455e-05, "loss": 109.4625, "step": 5231, "task_loss": 2.152480363845825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928277925419527, "compression/movement_sparsity/importance_threshold": -5.0232239107256445e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916727956888315, "compression/movement_sparsity/model_sparsity": 0.8852355313841397, "compression_loss": 105.15281677246094, "distillation_loss": 3.6329870223999023, "epoch": 4.42, "learning_rate": 3.0985254062177135e-05, "loss": 108.9785, "step": 5232, "task_loss": 1.7407716512680054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928591578361597, "compression/movement_sparsity/importance_threshold": -5.001256490409171e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.916751864844425, "compression/movement_sparsity/model_sparsity": 0.8852586180284078, "compression_loss": 105.15546417236328, "distillation_loss": 4.558316230773926, "epoch": 4.42, "learning_rate": 3.098055790363483e-05, "loss": 108.7487, "step": 5233, "task_loss": 2.8748414516448975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9928904315529306, "compression/movement_sparsity/importance_threshold": -4.9793532085006496e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9167828557561105, "compression/movement_sparsity/model_sparsity": 0.8852885443069377, "compression_loss": 105.15814971923828, "distillation_loss": 3.4313440322875977, "epoch": 4.42, "learning_rate": 3.0975861745092514e-05, "loss": 109.0161, "step": 5234, "task_loss": 2.4024136066436768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929216138261504, "compression/movement_sparsity/importance_threshold": -4.957513971230211e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168376711547326, "compression/movement_sparsity/model_sparsity": 0.8853414766279852, "compression_loss": 105.1607666015625, "distillation_loss": 4.455592632293701, "epoch": 4.42, "learning_rate": 3.097116558655021e-05, "loss": 109.0204, "step": 5235, "task_loss": 2.1069235801696777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929527047897043, "compression/movement_sparsity/importance_threshold": -4.935738684828505e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168179843539658, "compression/movement_sparsity/model_sparsity": 0.8853224661293885, "compression_loss": 105.16339874267578, "distillation_loss": 3.966323137283325, "epoch": 4.43, "learning_rate": 3.0966469428007894e-05, "loss": 109.1315, "step": 5236, "task_loss": 2.7347984313964844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9929837045774774, "compression/movement_sparsity/importance_threshold": -4.9140272555258345e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168137512744551, "compression/movement_sparsity/model_sparsity": 0.8853183784691814, "compression_loss": 105.1659927368164, "distillation_loss": 5.205983638763428, "epoch": 4.43, "learning_rate": 3.096177326946558e-05, "loss": 109.5408, "step": 5237, "task_loss": 2.3927104473114014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930146133233549, "compression/movement_sparsity/importance_threshold": -4.892379589552676e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168113306684249, "compression/movement_sparsity/model_sparsity": 0.8853160410184151, "compression_loss": 105.1685791015625, "distillation_loss": 4.418153762817383, "epoch": 4.43, "learning_rate": 3.0957077110923267e-05, "loss": 109.0223, "step": 5238, "task_loss": 2.2882320880889893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930454311612219, "compression/movement_sparsity/importance_threshold": -4.8707955931393335e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168251030820443, "compression/movement_sparsity/model_sparsity": 0.8853293403072577, "compression_loss": 105.17120361328125, "distillation_loss": 5.473730087280273, "epoch": 4.43, "learning_rate": 3.095238095238095e-05, "loss": 109.5473, "step": 5239, "task_loss": 3.9921133518218994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9930761582249636, "compression/movement_sparsity/importance_threshold": -4.849275172516109e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9168887065922141, "compression/movement_sparsity/model_sparsity": 0.8853907588411859, "compression_loss": 105.17384338378906, "distillation_loss": 4.805716514587402, "epoch": 4.43, "learning_rate": 3.0947684793838646e-05, "loss": 108.8276, "step": 5240, "task_loss": 2.215390682220459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931067946484651, "compression/movement_sparsity/importance_threshold": -4.8278182339136534e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169547426325815, "compression/movement_sparsity/model_sparsity": 0.885454526340416, "compression_loss": 105.17639923095703, "distillation_loss": 3.5674562454223633, "epoch": 4.43, "learning_rate": 3.0942988635296326e-05, "loss": 109.5402, "step": 5241, "task_loss": 2.259798526763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931373405656115, "compression/movement_sparsity/importance_threshold": -4.806424683562183e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9169637096066436, "compression/movement_sparsity/model_sparsity": 0.8854631852713335, "compression_loss": 105.17903137207031, "distillation_loss": 3.3868844509124756, "epoch": 4.43, "learning_rate": 3.093829247675402e-05, "loss": 108.8692, "step": 5242, "task_loss": 2.981285572052002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993167796110288, "compression/movement_sparsity/importance_threshold": -4.785094427692087e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170276708418424, "compression/movement_sparsity/model_sparsity": 0.8855249492413354, "compression_loss": 105.18157958984375, "distillation_loss": 4.184350490570068, "epoch": 4.43, "learning_rate": 3.0933596318211705e-05, "loss": 109.3318, "step": 5243, "task_loss": 1.5083893537521362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9931981614163798, "compression/movement_sparsity/importance_threshold": -4.763827372533842e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170235450798404, "compression/movement_sparsity/model_sparsity": 0.8855209652119506, "compression_loss": 105.18406677246094, "distillation_loss": 2.452932834625244, "epoch": 4.43, "learning_rate": 3.092890015966939e-05, "loss": 108.6115, "step": 5244, "task_loss": 1.7664517164230347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932284366177719, "compression/movement_sparsity/importance_threshold": -4.742623424317839e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170272177234723, "compression/movement_sparsity/model_sparsity": 0.8855245116889753, "compression_loss": 105.18659973144531, "distillation_loss": 3.682135820388794, "epoch": 4.43, "learning_rate": 3.092420400112708e-05, "loss": 109.0627, "step": 5245, "task_loss": 2.246311664581299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932586218483495, "compression/movement_sparsity/importance_threshold": -4.721482489274554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170278735526922, "compression/movement_sparsity/model_sparsity": 0.8855251449884439, "compression_loss": 105.1890640258789, "distillation_loss": 3.267014741897583, "epoch": 4.43, "learning_rate": 3.0919507842584764e-05, "loss": 108.9767, "step": 5246, "task_loss": 1.7679287195205688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9932887172419977, "compression/movement_sparsity/importance_threshold": -4.7004044736341163e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170059450084099, "compression/movement_sparsity/model_sparsity": 0.8855039697571178, "compression_loss": 105.19158172607422, "distillation_loss": 4.145192623138428, "epoch": 4.44, "learning_rate": 3.091481168404246e-05, "loss": 108.6701, "step": 5247, "task_loss": 2.332385540008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933187229326018, "compression/movement_sparsity/importance_threshold": -4.6793892836271764e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170852884198589, "compression/movement_sparsity/model_sparsity": 0.8855805874782947, "compression_loss": 105.19409942626953, "distillation_loss": 4.399828910827637, "epoch": 4.44, "learning_rate": 3.0910115525500144e-05, "loss": 109.2916, "step": 5248, "task_loss": 2.642291307449341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933486390540468, "compression/movement_sparsity/importance_threshold": -4.658436825484037e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917103949742209, "compression/movement_sparsity/model_sparsity": 0.8855986077268131, "compression_loss": 105.19660186767578, "distillation_loss": 5.788728713989258, "epoch": 4.44, "learning_rate": 3.090541936695783e-05, "loss": 109.7256, "step": 5249, "task_loss": 2.857025623321533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9933784657402178, "compression/movement_sparsity/importance_threshold": -4.6375470054351754e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9170873513008599, "compression/movement_sparsity/model_sparsity": 0.8855825794929871, "compression_loss": 105.19908142089844, "distillation_loss": 3.5973081588745117, "epoch": 4.44, "learning_rate": 3.0900723208415516e-05, "loss": 109.228, "step": 5250, "task_loss": 2.441951274871826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.993408203125, "compression/movement_sparsity/importance_threshold": -4.616719729710894e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171206435768993, "compression/movement_sparsity/model_sparsity": 0.8856147280769254, "compression_loss": 105.2015609741211, "distillation_loss": 3.9575893878936768, "epoch": 4.44, "learning_rate": 3.08960270498732e-05, "loss": 109.0105, "step": 5251, "task_loss": 2.564823627471924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934378513422786, "compression/movement_sparsity/importance_threshold": -4.595954904541583e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171166489807412, "compression/movement_sparsity/model_sparsity": 0.8856108707074343, "compression_loss": 105.2041015625, "distillation_loss": 4.515467166900635, "epoch": 4.44, "learning_rate": 3.0891330891330896e-05, "loss": 109.6784, "step": 5252, "task_loss": 1.8135656118392944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934674105259387, "compression/movement_sparsity/importance_threshold": -4.5752524361577195e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9171779392023895, "compression/movement_sparsity/model_sparsity": 0.8856700554214183, "compression_loss": 105.2065658569336, "distillation_loss": 4.74397087097168, "epoch": 4.44, "learning_rate": 3.088663473278858e-05, "loss": 109.1572, "step": 5253, "task_loss": 2.8507578372955322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9934968808098653, "compression/movement_sparsity/importance_threshold": -4.5546122307896925e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9172048162762408, "compression/movement_sparsity/model_sparsity": 0.8856960091850992, "compression_loss": 105.20906066894531, "distillation_loss": 3.138256072998047, "epoch": 4.44, "learning_rate": 3.088193857424627e-05, "loss": 108.8121, "step": 5254, "task_loss": 2.1193490028381348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935262623279438, "compression/movement_sparsity/importance_threshold": -4.5340341946678055e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9172208781300463, "compression/movement_sparsity/model_sparsity": 0.8857115192648144, "compression_loss": 105.21150970458984, "distillation_loss": 5.062928199768066, "epoch": 4.44, "learning_rate": 3.0877242415703955e-05, "loss": 109.5122, "step": 5255, "task_loss": 2.8871235847473145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935555552140591, "compression/movement_sparsity/importance_threshold": -4.513518234022622e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9172792946272944, "compression/movement_sparsity/model_sparsity": 0.8857679289756718, "compression_loss": 105.21401977539062, "distillation_loss": 5.650177955627441, "epoch": 4.44, "learning_rate": 3.087254625716164e-05, "loss": 109.7726, "step": 5256, "task_loss": 2.3311243057250977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9935847596020966, "compression/movement_sparsity/importance_threshold": -4.493064255084358e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173170942387001, "compression/movement_sparsity/model_sparsity": 0.8858044300541406, "compression_loss": 105.21644592285156, "distillation_loss": 5.053370475769043, "epoch": 4.44, "learning_rate": 3.0867850098619334e-05, "loss": 109.4565, "step": 5257, "task_loss": 2.589144706726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936138756259413, "compression/movement_sparsity/importance_threshold": -4.4726721640834036e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173144589976525, "compression/movement_sparsity/model_sparsity": 0.88580188534173, "compression_loss": 105.21892547607422, "distillation_loss": 5.526743412017822, "epoch": 4.44, "learning_rate": 3.0863153940077014e-05, "loss": 109.1872, "step": 5258, "task_loss": 2.6437430381774902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936429034194781, "compression/movement_sparsity/importance_threshold": -4.452341867250409e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917342409246591, "compression/movement_sparsity/model_sparsity": 0.8858288754136323, "compression_loss": 105.22139739990234, "distillation_loss": 4.86757755279541, "epoch": 4.45, "learning_rate": 3.085845778153471e-05, "loss": 109.1638, "step": 5259, "task_loss": 2.7847962379455566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9936718431165926, "compression/movement_sparsity/importance_threshold": -4.4320732708155036e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173501003347162, "compression/movement_sparsity/model_sparsity": 0.8858363022892198, "compression_loss": 105.22379302978516, "distillation_loss": 4.006549835205078, "epoch": 4.45, "learning_rate": 3.085376162299239e-05, "loss": 109.687, "step": 5260, "task_loss": 2.1678307056427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937006948511696, "compression/movement_sparsity/importance_threshold": -4.411866281009251e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9173517220215146, "compression/movement_sparsity/model_sparsity": 0.8858378682660879, "compression_loss": 105.22624206542969, "distillation_loss": 5.853924751281738, "epoch": 4.45, "learning_rate": 3.0849065464450086e-05, "loss": 109.7367, "step": 5261, "task_loss": 3.138742446899414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937294587570944, "compression/movement_sparsity/importance_threshold": -4.391720804061954e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174298133953619, "compression/movement_sparsity/model_sparsity": 0.8859132769610063, "compression_loss": 105.22869873046875, "distillation_loss": 2.8816800117492676, "epoch": 4.45, "learning_rate": 3.0844369305907766e-05, "loss": 108.588, "step": 5262, "task_loss": 2.378624200820923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937581349682522, "compression/movement_sparsity/importance_threshold": -4.371636746204003e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174160409817425, "compression/movement_sparsity/model_sparsity": 0.8858999776721637, "compression_loss": 105.23108673095703, "distillation_loss": 4.33465576171875, "epoch": 4.45, "learning_rate": 3.083967314736545e-05, "loss": 108.9621, "step": 5263, "task_loss": 2.159965991973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9937867236185278, "compression/movement_sparsity/importance_threshold": -4.351614013665961e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174656216707724, "compression/movement_sparsity/model_sparsity": 0.8859478551119974, "compression_loss": 105.23346710205078, "distillation_loss": 3.8796310424804688, "epoch": 4.45, "learning_rate": 3.0834976988823145e-05, "loss": 109.4398, "step": 5264, "task_loss": 2.3852086067199707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938152248418067, "compression/movement_sparsity/importance_threshold": -4.3316525126779575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9174593376344283, "compression/movement_sparsity/model_sparsity": 0.8859417869516336, "compression_loss": 105.23591613769531, "distillation_loss": 5.581107139587402, "epoch": 4.45, "learning_rate": 3.083028083028083e-05, "loss": 109.9642, "step": 5265, "task_loss": 3.0037808418273926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938436387719739, "compression/movement_sparsity/importance_threshold": -4.311752149470643e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175175752691619, "compression/movement_sparsity/model_sparsity": 0.8859980239444541, "compression_loss": 105.23829650878906, "distillation_loss": 4.918869495391846, "epoch": 4.45, "learning_rate": 3.082558467173852e-05, "loss": 109.7805, "step": 5266, "task_loss": 2.8536181449890137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9938719655429147, "compression/movement_sparsity/importance_threshold": -4.291912830274233e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175291178634334, "compression/movement_sparsity/model_sparsity": 0.8860091700151033, "compression_loss": 105.24070739746094, "distillation_loss": 4.777923583984375, "epoch": 4.45, "learning_rate": 3.0820888513196204e-05, "loss": 109.757, "step": 5267, "task_loss": 3.8504798412323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939002052885139, "compression/movement_sparsity/importance_threshold": -4.272134461319292e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9175766833681328, "compression/movement_sparsity/model_sparsity": 0.8860551014983877, "compression_loss": 105.24308776855469, "distillation_loss": 5.100000381469727, "epoch": 4.45, "learning_rate": 3.08161923546539e-05, "loss": 109.7757, "step": 5268, "task_loss": 3.0352768898010254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939283581426569, "compression/movement_sparsity/importance_threshold": -4.252416948836036e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176074357964658, "compression/movement_sparsity/model_sparsity": 0.8860847974862018, "compression_loss": 105.24553680419922, "distillation_loss": 6.948433876037598, "epoch": 4.45, "learning_rate": 3.0811496196111584e-05, "loss": 110.2996, "step": 5269, "task_loss": 3.181124210357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939564242392288, "compression/movement_sparsity/importance_threshold": -4.232760199054941e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917685002506937, "compression/movement_sparsity/model_sparsity": 0.8861596995415453, "compression_loss": 105.24791717529297, "distillation_loss": 4.228874206542969, "epoch": 4.45, "learning_rate": 3.080680003756927e-05, "loss": 109.5546, "step": 5270, "task_loss": 1.6805094480514526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9939844037121147, "compression/movement_sparsity/importance_threshold": -4.213164118206485e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176515313683832, "compression/movement_sparsity/model_sparsity": 0.88612737823957, "compression_loss": 105.25028991699219, "distillation_loss": 2.8455381393432617, "epoch": 4.46, "learning_rate": 3.0802103879026956e-05, "loss": 108.9509, "step": 5271, "task_loss": 2.339909076690674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940122966951997, "compression/movement_sparsity/importance_threshold": -4.193628612520969e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176898675673324, "compression/movement_sparsity/model_sparsity": 0.8861643974721495, "compression_loss": 105.25262451171875, "distillation_loss": 3.362929582595825, "epoch": 4.46, "learning_rate": 3.079740772048464e-05, "loss": 109.5408, "step": 5272, "task_loss": 2.3124594688415527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940401033223691, "compression/movement_sparsity/importance_threshold": -4.174153588228785e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176952811394391, "compression/movement_sparsity/model_sparsity": 0.8861696250714002, "compression_loss": 105.2550048828125, "distillation_loss": 5.497345924377441, "epoch": 4.46, "learning_rate": 3.0792711561942336e-05, "loss": 109.056, "step": 5273, "task_loss": 3.7860612869262695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940678237275079, "compression/movement_sparsity/importance_threshold": -4.154738951560495e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9176756420353428, "compression/movement_sparsity/model_sparsity": 0.8861506606309466, "compression_loss": 105.25736999511719, "distillation_loss": 3.5265634059906006, "epoch": 4.46, "learning_rate": 3.078801540340002e-05, "loss": 108.8464, "step": 5274, "task_loss": 2.1905648708343506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9940954580445013, "compression/movement_sparsity/importance_threshold": -4.1353846087462294e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177010047399043, "compression/movement_sparsity/model_sparsity": 0.8861751520485815, "compression_loss": 105.25968170166016, "distillation_loss": 4.278606414794922, "epoch": 4.46, "learning_rate": 3.078331924485771e-05, "loss": 108.987, "step": 5275, "task_loss": 2.090707302093506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941230064072344, "compression/movement_sparsity/importance_threshold": -4.116090466016638e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177302427989474, "compression/movement_sparsity/model_sparsity": 0.8862033856903498, "compression_loss": 105.26197052001953, "distillation_loss": 3.99467396736145, "epoch": 4.46, "learning_rate": 3.0778623086315395e-05, "loss": 109.8677, "step": 5276, "task_loss": 2.405158281326294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941504689495924, "compression/movement_sparsity/importance_threshold": -4.096856429601937e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177634396816458, "compression/movement_sparsity/model_sparsity": 0.8862354421580018, "compression_loss": 105.26428985595703, "distillation_loss": 3.988898754119873, "epoch": 4.46, "learning_rate": 3.077392692777308e-05, "loss": 108.9139, "step": 5277, "task_loss": 2.250352621078491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9941778458054604, "compression/movement_sparsity/importance_threshold": -4.077682405732516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177769259152418, "compression/movement_sparsity/model_sparsity": 0.8862484650979854, "compression_loss": 105.26655578613281, "distillation_loss": 3.4501352310180664, "epoch": 4.46, "learning_rate": 3.0769230769230774e-05, "loss": 108.8478, "step": 5278, "task_loss": 1.312605619430542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942051371087235, "compression/movement_sparsity/importance_threshold": -4.058568300638939e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178341380715588, "compression/movement_sparsity/model_sparsity": 0.8863037118407276, "compression_loss": 105.26884460449219, "distillation_loss": 4.382536888122559, "epoch": 4.46, "learning_rate": 3.0764534610688454e-05, "loss": 109.1438, "step": 5279, "task_loss": 2.3028433322906494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942323429932669, "compression/movement_sparsity/importance_threshold": -4.0395140205515094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178003569046463, "compression/movement_sparsity/model_sparsity": 0.8862710911608217, "compression_loss": 105.27108764648438, "distillation_loss": 5.245271682739258, "epoch": 4.46, "learning_rate": 3.075983845214615e-05, "loss": 108.9027, "step": 5280, "task_loss": 2.6318256855010986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942594635929758, "compression/movement_sparsity/importance_threshold": -4.0205194717005295e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178585706910446, "compression/movement_sparsity/model_sparsity": 0.8863273051245706, "compression_loss": 105.27335357666016, "distillation_loss": 4.9941725730896, "epoch": 4.46, "learning_rate": 3.075514229360383e-05, "loss": 109.4201, "step": 5281, "task_loss": 2.400455951690674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9942864990417352, "compression/movement_sparsity/importance_threshold": -4.00158456031665e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178685631435235, "compression/movement_sparsity/model_sparsity": 0.8863369543055665, "compression_loss": 105.27555084228516, "distillation_loss": 5.163814067840576, "epoch": 4.46, "learning_rate": 3.075044613506152e-05, "loss": 109.0704, "step": 5282, "task_loss": 3.985599994659424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943134494734303, "compression/movement_sparsity/importance_threshold": -3.98270919263e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178773035584006, "compression/movement_sparsity/model_sparsity": 0.8863453944603039, "compression_loss": 105.27781677246094, "distillation_loss": 3.9177510738372803, "epoch": 4.47, "learning_rate": 3.074574997651921e-05, "loss": 109.3048, "step": 5283, "task_loss": 2.3960378170013428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943403150219462, "compression/movement_sparsity/importance_threshold": -3.963893274871143e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178436297089969, "compression/movement_sparsity/model_sparsity": 0.8863128774112201, "compression_loss": 105.280029296875, "distillation_loss": 5.306855201721191, "epoch": 4.47, "learning_rate": 3.074105381797689e-05, "loss": 110.1449, "step": 5284, "task_loss": 2.90151047706604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943670958211681, "compression/movement_sparsity/importance_threshold": -3.945136713270469e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178832656422184, "compression/movement_sparsity/model_sparsity": 0.8863511517282011, "compression_loss": 105.28221893310547, "distillation_loss": 3.8354897499084473, "epoch": 4.47, "learning_rate": 3.0736357659434586e-05, "loss": 109.723, "step": 5285, "task_loss": 2.0803427696228027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943937920049812, "compression/movement_sparsity/importance_threshold": -3.926439414058281e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917800368828814, "compression/movement_sparsity/model_sparsity": 0.8862711026753575, "compression_loss": 105.28441619873047, "distillation_loss": 3.583292007446289, "epoch": 4.47, "learning_rate": 3.073166150089227e-05, "loss": 109.135, "step": 5286, "task_loss": 2.5634682178497314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944204037072706, "compression/movement_sparsity/importance_threshold": -3.907801283464969e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9177729790157544, "compression/movement_sparsity/model_sparsity": 0.8862446537866374, "compression_loss": 105.2865982055664, "distillation_loss": 3.181723117828369, "epoch": 4.47, "learning_rate": 3.072696534234996e-05, "loss": 109.4952, "step": 5287, "task_loss": 1.8665926456451416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944469310619213, "compression/movement_sparsity/importance_threshold": -3.8892222277211834e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.917788873931213, "compression/movement_sparsity/model_sparsity": 0.8862600026628515, "compression_loss": 105.2886962890625, "distillation_loss": 3.6482009887695312, "epoch": 4.47, "learning_rate": 3.0722269183807645e-05, "loss": 109.0572, "step": 5288, "task_loss": 2.057821035385132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944733742028186, "compression/movement_sparsity/importance_threshold": -3.8707021530569666e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9178495440961443, "compression/movement_sparsity/model_sparsity": 0.8863185886209741, "compression_loss": 105.29085540771484, "distillation_loss": 4.0911545753479, "epoch": 4.47, "learning_rate": 3.071757302526533e-05, "loss": 109.0379, "step": 5289, "task_loss": 2.0330793857574463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9944997332638476, "compression/movement_sparsity/importance_threshold": -3.852240965702969e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9179039183005638, "compression/movement_sparsity/model_sparsity": 0.8863710949041973, "compression_loss": 105.29296875, "distillation_loss": 4.281589031219482, "epoch": 4.47, "learning_rate": 3.0712876866723024e-05, "loss": 109.2861, "step": 5290, "task_loss": 1.6291879415512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945260083788934, "compression/movement_sparsity/importance_threshold": -3.8338385718894065e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180227426310552, "compression/movement_sparsity/model_sparsity": 0.8864858372533899, "compression_loss": 105.29511260986328, "distillation_loss": 3.5341243743896484, "epoch": 4.47, "learning_rate": 3.070818070818071e-05, "loss": 110.1602, "step": 5291, "task_loss": 2.307964324951172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945521996818412, "compression/movement_sparsity/importance_threshold": -3.815494877846843e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180179848881684, "compression/movement_sparsity/model_sparsity": 0.8864812429536079, "compression_loss": 105.29718017578125, "distillation_loss": 3.4971156120300293, "epoch": 4.47, "learning_rate": 3.07034845496384e-05, "loss": 109.9427, "step": 5292, "task_loss": 2.1439735889434814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9945783073065761, "compression/movement_sparsity/importance_threshold": -3.7972097898054946e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9180805271474186, "compression/movement_sparsity/model_sparsity": 0.8865416366938502, "compression_loss": 105.29926300048828, "distillation_loss": 3.723022222518921, "epoch": 4.47, "learning_rate": 3.069878839109608e-05, "loss": 108.9296, "step": 5293, "task_loss": 1.5795183181762695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946043313869832, "compression/movement_sparsity/importance_threshold": -3.7789832139960115e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918141912762408, "compression/movement_sparsity/model_sparsity": 0.8866009135241205, "compression_loss": 105.30138397216797, "distillation_loss": 5.6574530601501465, "epoch": 4.47, "learning_rate": 3.0694092232553776e-05, "loss": 110.1239, "step": 5294, "task_loss": 2.6224288940429688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946302720569479, "compression/movement_sparsity/importance_threshold": -3.7608150566485234e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9182028333348596, "compression/movement_sparsity/model_sparsity": 0.8866597412874948, "compression_loss": 105.30339050292969, "distillation_loss": 3.7984118461608887, "epoch": 4.48, "learning_rate": 3.068939607401146e-05, "loss": 109.015, "step": 5295, "task_loss": 2.6459646224975586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994656129450355, "compression/movement_sparsity/importance_threshold": -3.7427052239935935e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9182601170361823, "compression/movement_sparsity/model_sparsity": 0.8867150571174518, "compression_loss": 105.30545806884766, "distillation_loss": 4.660604953765869, "epoch": 4.48, "learning_rate": 3.068469991546914e-05, "loss": 109.3836, "step": 5296, "task_loss": 2.5265064239501953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9946819037010897, "compression/movement_sparsity/importance_threshold": -3.7246536222616984e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183058819915687, "compression/movement_sparsity/model_sparsity": 0.8867592499058312, "compression_loss": 105.30752563476562, "distillation_loss": 4.230428218841553, "epoch": 4.48, "learning_rate": 3.0680003756926835e-05, "loss": 109.076, "step": 5297, "task_loss": 2.614109992980957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947075949430373, "compression/movement_sparsity/importance_threshold": -3.706660157682968e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183171622541523, "compression/movement_sparsity/model_sparsity": 0.8867701426566929, "compression_loss": 105.30957794189453, "distillation_loss": 3.8527121543884277, "epoch": 4.48, "learning_rate": 3.067530759838452e-05, "loss": 109.2545, "step": 5298, "task_loss": 2.9995381832122803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947332033100829, "compression/movement_sparsity/importance_threshold": -3.688724736487965e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183194755426737, "compression/movement_sparsity/model_sparsity": 0.886772376476637, "compression_loss": 105.31165313720703, "distillation_loss": 3.8960328102111816, "epoch": 4.48, "learning_rate": 3.0670611439842215e-05, "loss": 109.3, "step": 5299, "task_loss": 2.283010244369507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947587289361115, "compression/movement_sparsity/importance_threshold": -3.67084726490708e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9184033024411538, "compression/movement_sparsity/model_sparsity": 0.8868533236632726, "compression_loss": 105.31376647949219, "distillation_loss": 4.463361740112305, "epoch": 4.48, "learning_rate": 3.06659152812999e-05, "loss": 108.9276, "step": 5300, "task_loss": 1.5154794454574585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9947841719550083, "compression/movement_sparsity/importance_threshold": -3.65302764917079e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185040735818444, "compression/movement_sparsity/model_sparsity": 0.8869506330052723, "compression_loss": 105.3158187866211, "distillation_loss": 4.496820449829102, "epoch": 4.48, "learning_rate": 3.066121912275759e-05, "loss": 109.5484, "step": 5301, "task_loss": 3.04172420501709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948095325006586, "compression/movement_sparsity/importance_threshold": -3.63526579550931e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9184917916891795, "compression/movement_sparsity/model_sparsity": 0.886938773033404, "compression_loss": 105.3178939819336, "distillation_loss": 4.582427978515625, "epoch": 4.48, "learning_rate": 3.0656522964215274e-05, "loss": 109.8135, "step": 5302, "task_loss": 2.1595518589019775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948348107069473, "compression/movement_sparsity/importance_threshold": -3.617561610153204e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9184646403594726, "compression/movement_sparsity/model_sparsity": 0.8869125544353997, "compression_loss": 105.31996154785156, "distillation_loss": 4.720130920410156, "epoch": 4.48, "learning_rate": 3.065182680567296e-05, "loss": 109.5574, "step": 5303, "task_loss": 3.4044392108917236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9948600067077598, "compression/movement_sparsity/importance_threshold": -3.5999149993327755e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185295674522499, "compression/movement_sparsity/model_sparsity": 0.8869752510828011, "compression_loss": 105.32205963134766, "distillation_loss": 5.022746562957764, "epoch": 4.48, "learning_rate": 3.064713064713065e-05, "loss": 110.2888, "step": 5304, "task_loss": 2.9065051078796387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994885120636981, "compression/movement_sparsity/importance_threshold": -3.582325869278414e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185984891411852, "compression/movement_sparsity/model_sparsity": 0.8870418050996934, "compression_loss": 105.32415771484375, "distillation_loss": 4.093623161315918, "epoch": 4.48, "learning_rate": 3.064243448858833e-05, "loss": 108.9882, "step": 5305, "task_loss": 2.3920021057128906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949101526284962, "compression/movement_sparsity/importance_threshold": -3.5647941262205093e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185527003374635, "compression/movement_sparsity/model_sparsity": 0.8869975892822425, "compression_loss": 105.32622528076172, "distillation_loss": 3.9799437522888184, "epoch": 4.48, "learning_rate": 3.0637738330046026e-05, "loss": 109.1541, "step": 5306, "task_loss": 2.2192273139953613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949351028161905, "compression/movement_sparsity/importance_threshold": -3.547319676389538e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186018159839556, "compression/movement_sparsity/model_sparsity": 0.8870450176551802, "compression_loss": 105.3282699584961, "distillation_loss": 4.50498104095459, "epoch": 4.49, "learning_rate": 3.063304217150371e-05, "loss": 109.4839, "step": 5307, "task_loss": 2.276562213897705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.994959971333949, "compression/movement_sparsity/importance_threshold": -3.5299024260158907e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918594172592501, "compression/movement_sparsity/model_sparsity": 0.8870376368377358, "compression_loss": 105.33024597167969, "distillation_loss": 3.230168342590332, "epoch": 4.49, "learning_rate": 3.06283460129614e-05, "loss": 109.4122, "step": 5308, "task_loss": 1.8821237087249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9949847583156568, "compression/movement_sparsity/importance_threshold": -3.5125422813298696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186003493113364, "compression/movement_sparsity/model_sparsity": 0.8870436013672774, "compression_loss": 105.3322525024414, "distillation_loss": 4.47476863861084, "epoch": 4.49, "learning_rate": 3.0623649854419085e-05, "loss": 110.2725, "step": 5309, "task_loss": 3.0541789531707764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950094638951992, "compression/movement_sparsity/importance_threshold": -3.4952391485619516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185686667979279, "compression/movement_sparsity/model_sparsity": 0.8870130072456713, "compression_loss": 105.3342514038086, "distillation_loss": 4.209713935852051, "epoch": 4.49, "learning_rate": 3.061895369587677e-05, "loss": 109.6739, "step": 5310, "task_loss": 2.536536455154419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950340882064611, "compression/movement_sparsity/importance_threshold": -3.4779929339426134e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185996338612782, "compression/movement_sparsity/model_sparsity": 0.8870429104951297, "compression_loss": 105.33627319335938, "distillation_loss": 4.76332426071167, "epoch": 4.49, "learning_rate": 3.0614257537334464e-05, "loss": 109.6265, "step": 5311, "task_loss": 2.166757583618164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950586313833278, "compression/movement_sparsity/importance_threshold": -3.460803543702158e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186276914277254, "compression/movement_sparsity/model_sparsity": 0.8870700041978543, "compression_loss": 105.3382339477539, "distillation_loss": 4.837841033935547, "epoch": 4.49, "learning_rate": 3.060956137879215e-05, "loss": 109.2596, "step": 5312, "task_loss": 1.9219720363616943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9950830935596846, "compression/movement_sparsity/importance_threshold": -3.443670884070802e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186098409487745, "compression/movement_sparsity/model_sparsity": 0.8870527669377699, "compression_loss": 105.3401870727539, "distillation_loss": 4.139411449432373, "epoch": 4.49, "learning_rate": 3.060486522024984e-05, "loss": 109.3898, "step": 5313, "task_loss": 1.2577857971191406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951074748694163, "compression/movement_sparsity/importance_threshold": -3.426594861279282e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185456412302231, "compression/movement_sparsity/model_sparsity": 0.8869907726770521, "compression_loss": 105.34212493896484, "distillation_loss": 4.207692623138428, "epoch": 4.49, "learning_rate": 3.060016906170752e-05, "loss": 109.8421, "step": 5314, "task_loss": 2.4605307579040527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951317754464083, "compression/movement_sparsity/importance_threshold": -3.4095753815578146e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186028772348752, "compression/movement_sparsity/model_sparsity": 0.8870460424488659, "compression_loss": 105.34404754638672, "distillation_loss": 4.348567485809326, "epoch": 4.49, "learning_rate": 3.059547290316521e-05, "loss": 110.3525, "step": 5315, "task_loss": 2.480313777923584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951559954245457, "compression/movement_sparsity/importance_threshold": -3.3926123511367895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185856468126413, "compression/movement_sparsity/model_sparsity": 0.8870294039446428, "compression_loss": 105.34600067138672, "distillation_loss": 4.381779670715332, "epoch": 4.49, "learning_rate": 3.05907767446229e-05, "loss": 109.9789, "step": 5316, "task_loss": 2.176074981689453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9951801349377135, "compression/movement_sparsity/importance_threshold": -3.3757056762465966e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186402237279108, "compression/movement_sparsity/model_sparsity": 0.8870821059749744, "compression_loss": 105.34786987304688, "distillation_loss": 3.0911898612976074, "epoch": 4.49, "learning_rate": 3.058608058608059e-05, "loss": 109.0962, "step": 5317, "task_loss": 2.878769874572754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952041941197969, "compression/movement_sparsity/importance_threshold": -3.358855263117799e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185957823551318, "compression/movement_sparsity/model_sparsity": 0.8870391913000681, "compression_loss": 105.34982299804688, "distillation_loss": 4.0792717933654785, "epoch": 4.5, "learning_rate": 3.0581384427538275e-05, "loss": 109.5954, "step": 5318, "task_loss": 2.5904035568237305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952281731046811, "compression/movement_sparsity/importance_threshold": -3.342061017980527e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185010209949297, "compression/movement_sparsity/model_sparsity": 0.8869476852841089, "compression_loss": 105.35173797607422, "distillation_loss": 6.36912727355957, "epoch": 4.5, "learning_rate": 3.057668826899596e-05, "loss": 109.8856, "step": 5319, "task_loss": 3.078721284866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952520720262512, "compression/movement_sparsity/importance_threshold": -3.325322847065517e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185061960836837, "compression/movement_sparsity/model_sparsity": 0.8869526825926437, "compression_loss": 105.35358428955078, "distillation_loss": 3.458751678466797, "epoch": 4.5, "learning_rate": 3.057199211045365e-05, "loss": 109.4872, "step": 5320, "task_loss": 2.1588242053985596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952758910183924, "compression/movement_sparsity/importance_threshold": -3.308640656602812e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918523700761773, "compression/movement_sparsity/model_sparsity": 0.8869695859311901, "compression_loss": 105.35547637939453, "distillation_loss": 3.0859713554382324, "epoch": 4.5, "learning_rate": 3.056729595191134e-05, "loss": 109.0976, "step": 5321, "task_loss": 2.262476921081543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9952996302149898, "compression/movement_sparsity/importance_threshold": -3.292014352822975e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185321072999564, "compression/movement_sparsity/model_sparsity": 0.8869777036789253, "compression_loss": 105.35736083984375, "distillation_loss": 5.503211498260498, "epoch": 4.5, "learning_rate": 3.056259979336902e-05, "loss": 109.3787, "step": 5322, "task_loss": 1.9152984619140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953232897499285, "compression/movement_sparsity/importance_threshold": -3.275443841956483e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185563133602571, "compression/movement_sparsity/model_sparsity": 0.8870010781865882, "compression_loss": 105.35924530029297, "distillation_loss": 3.88155198097229, "epoch": 4.5, "learning_rate": 3.0557903634826714e-05, "loss": 108.9887, "step": 5323, "task_loss": 1.7157261371612549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953468697570936, "compression/movement_sparsity/importance_threshold": -3.2589290302335525e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185710277831198, "compression/movement_sparsity/model_sparsity": 0.8870152871237587, "compression_loss": 105.36111450195312, "distillation_loss": 4.435579776763916, "epoch": 4.5, "learning_rate": 3.05532074762844e-05, "loss": 109.7016, "step": 5324, "task_loss": 1.9408481121063232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9953703703703703, "compression/movement_sparsity/importance_threshold": -3.2424698238847464e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186168165868415, "compression/movement_sparsity/model_sparsity": 0.8870595029412097, "compression_loss": 105.3630142211914, "distillation_loss": 3.593562364578247, "epoch": 4.5, "learning_rate": 3.054851131774209e-05, "loss": 109.1611, "step": 5325, "task_loss": 1.881563425064087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.995393791723644, "compression/movement_sparsity/importance_threshold": -3.226066129140368e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186343331890986, "compression/movement_sparsity/model_sparsity": 0.8870764177942919, "compression_loss": 105.36489868164062, "distillation_loss": 3.640774965286255, "epoch": 4.5, "learning_rate": 3.054381515919977e-05, "loss": 109.4842, "step": 5326, "task_loss": 2.349426746368408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954171339507993, "compression/movement_sparsity/importance_threshold": -3.2097178522308076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186680905076757, "compression/movement_sparsity/model_sparsity": 0.8871090154451262, "compression_loss": 105.36671447753906, "distillation_loss": 3.9799606800079346, "epoch": 4.5, "learning_rate": 3.053911900065746e-05, "loss": 109.231, "step": 5327, "task_loss": 1.6338565349578857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954403971857217, "compression/movement_sparsity/importance_threshold": -3.193424899386628e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9186610314004353, "compression/movement_sparsity/model_sparsity": 0.8871021988399358, "compression_loss": 105.36857604980469, "distillation_loss": 5.128190040588379, "epoch": 4.5, "learning_rate": 3.053442284211515e-05, "loss": 109.4388, "step": 5328, "task_loss": 2.8053669929504395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954635815622964, "compression/movement_sparsity/importance_threshold": -3.177187176837959e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187238717638763, "compression/movement_sparsity/model_sparsity": 0.887162880443573, "compression_loss": 105.37039947509766, "distillation_loss": 2.9506044387817383, "epoch": 4.5, "learning_rate": 3.052972668357284e-05, "loss": 109.2275, "step": 5329, "task_loss": 1.4456278085708618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9954866872144084, "compression/movement_sparsity/importance_threshold": -3.161004590815364e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187905040126255, "compression/movement_sparsity/model_sparsity": 0.887227223669593, "compression_loss": 105.37216186523438, "distillation_loss": 5.158373832702637, "epoch": 4.51, "learning_rate": 3.052503052503053e-05, "loss": 109.7285, "step": 5330, "task_loss": 2.8485584259033203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955097142759428, "compression/movement_sparsity/importance_threshold": -3.144877047549233e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188355773662891, "compression/movement_sparsity/model_sparsity": 0.8872707486148963, "compression_loss": 105.37397003173828, "distillation_loss": 5.424570083618164, "epoch": 4.51, "learning_rate": 3.052033436648821e-05, "loss": 110.2662, "step": 5331, "task_loss": 3.008532762527466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955326628807848, "compression/movement_sparsity/importance_threshold": -3.1288044532698685e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187537895004747, "compression/movement_sparsity/model_sparsity": 0.8871917704138815, "compression_loss": 105.3757553100586, "distillation_loss": 5.057656764984131, "epoch": 4.51, "learning_rate": 3.05156382079459e-05, "loss": 109.6675, "step": 5332, "task_loss": 2.7585439682006836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955555331628195, "compression/movement_sparsity/importance_threshold": -3.1127867142078346e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188274570081291, "compression/movement_sparsity/model_sparsity": 0.8872629072160203, "compression_loss": 105.37752532958984, "distillation_loss": 3.6905813217163086, "epoch": 4.51, "learning_rate": 3.051094204940359e-05, "loss": 108.8345, "step": 5333, "task_loss": 2.8215889930725098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9955783252559322, "compression/movement_sparsity/importance_threshold": -3.096823736593434e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188260141838451, "compression/movement_sparsity/model_sparsity": 0.8872615139571891, "compression_loss": 105.37918090820312, "distillation_loss": 3.648729085922241, "epoch": 4.51, "learning_rate": 3.050624589086128e-05, "loss": 109.232, "step": 5334, "task_loss": 2.2307941913604736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956010392940078, "compression/movement_sparsity/importance_threshold": -3.080915426657057e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91883083154757, "compression/movement_sparsity/model_sparsity": 0.8872661658296501, "compression_loss": 105.38093566894531, "distillation_loss": 5.115367889404297, "epoch": 4.51, "learning_rate": 3.0501549732318963e-05, "loss": 109.2095, "step": 5335, "task_loss": 3.3497884273529053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956236754109316, "compression/movement_sparsity/importance_threshold": -3.065061690629093e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189094475847932, "compression/movement_sparsity/model_sparsity": 0.8873420811641435, "compression_loss": 105.38265228271484, "distillation_loss": 2.999159812927246, "epoch": 4.51, "learning_rate": 3.049685357377665e-05, "loss": 109.4159, "step": 5336, "task_loss": 1.4671930074691772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956462337405887, "compression/movement_sparsity/importance_threshold": -3.049262434739932e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189302791056531, "compression/movement_sparsity/model_sparsity": 0.8873621970581765, "compression_loss": 105.38439178466797, "distillation_loss": 3.26348876953125, "epoch": 4.51, "learning_rate": 3.049215741523434e-05, "loss": 109.7191, "step": 5337, "task_loss": 1.5933661460876465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956687144168642, "compression/movement_sparsity/importance_threshold": -3.0335175652200508e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189319723374574, "compression/movement_sparsity/model_sparsity": 0.8873638321222594, "compression_loss": 105.38603973388672, "distillation_loss": 3.244957685470581, "epoch": 4.51, "learning_rate": 3.048746125669203e-05, "loss": 109.095, "step": 5338, "task_loss": 3.0963120460510254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9956911175736434, "compression/movement_sparsity/importance_threshold": -3.0178269882997523e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189316146124283, "compression/movement_sparsity/model_sparsity": 0.8873634866861856, "compression_loss": 105.38776397705078, "distillation_loss": 5.366868019104004, "epoch": 4.51, "learning_rate": 3.0482765098149712e-05, "loss": 109.9555, "step": 5339, "task_loss": 2.662236452102661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957134433448112, "compression/movement_sparsity/importance_threshold": -3.0021906102096e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189896137638092, "compression/movement_sparsity/model_sparsity": 0.8874194933882902, "compression_loss": 105.38941955566406, "distillation_loss": 6.266535758972168, "epoch": 4.51, "learning_rate": 3.0478068939607402e-05, "loss": 109.7656, "step": 5340, "task_loss": 3.578381299972534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957356918642528, "compression/movement_sparsity/importance_threshold": -2.98660833717981e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919012281606485, "compression/movement_sparsity/model_sparsity": 0.8874413825208356, "compression_loss": 105.39115905761719, "distillation_loss": 4.859249114990234, "epoch": 4.51, "learning_rate": 3.047337278106509e-05, "loss": 109.5688, "step": 5341, "task_loss": 2.8670389652252197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957578632658536, "compression/movement_sparsity/importance_threshold": -2.9710800754407726e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9189787389229253, "compression/movement_sparsity/model_sparsity": 0.8874089921316456, "compression_loss": 105.39287567138672, "distillation_loss": 7.30093240737915, "epoch": 4.52, "learning_rate": 3.0468676622522778e-05, "loss": 110.5444, "step": 5342, "task_loss": 3.6034481525421143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9957799576834985, "compression/movement_sparsity/importance_threshold": -2.955605731222964e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190306329004765, "compression/movement_sparsity/model_sparsity": 0.8874591033914234, "compression_loss": 105.3946304321289, "distillation_loss": 3.868135929107666, "epoch": 4.52, "learning_rate": 3.0463980463980464e-05, "loss": 109.5081, "step": 5343, "task_loss": 2.2713162899017334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958019752510725, "compression/movement_sparsity/importance_threshold": -2.9401852107568613e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190498665828731, "compression/movement_sparsity/model_sparsity": 0.8874776763376601, "compression_loss": 105.39637756347656, "distillation_loss": 3.9130611419677734, "epoch": 4.52, "learning_rate": 3.045928430543815e-05, "loss": 109.5001, "step": 5344, "task_loss": 2.5347094535827637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958239161024611, "compression/movement_sparsity/importance_threshold": -2.9248184202726804e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191046104364894, "compression/movement_sparsity/model_sparsity": 0.8875305395714927, "compression_loss": 105.39803314208984, "distillation_loss": 4.526621341705322, "epoch": 4.52, "learning_rate": 3.045458814689584e-05, "loss": 109.7642, "step": 5345, "task_loss": 3.4054384231567383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958457803715492, "compression/movement_sparsity/importance_threshold": -2.909505266000898e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191156879882231, "compression/movement_sparsity/model_sparsity": 0.8875412365752459, "compression_loss": 105.39976501464844, "distillation_loss": 4.938938140869141, "epoch": 4.52, "learning_rate": 3.044989198835353e-05, "loss": 110.0505, "step": 5346, "task_loss": 2.9246609210968018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958675681922221, "compression/movement_sparsity/importance_threshold": -2.894245654171991e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919059203206132, "compression/movement_sparsity/model_sparsity": 0.8874866922191872, "compression_loss": 105.40148162841797, "distillation_loss": 4.814578056335449, "epoch": 4.52, "learning_rate": 3.044519582981122e-05, "loss": 109.8968, "step": 5347, "task_loss": 2.663323163986206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9958892796983647, "compression/movement_sparsity/importance_threshold": -2.8790394910162623e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.918997412169443, "compression/movement_sparsity/model_sparsity": 0.8874270238946999, "compression_loss": 105.40314483642578, "distillation_loss": 3.3414509296417236, "epoch": 4.52, "learning_rate": 3.0440499671268903e-05, "loss": 109.0747, "step": 5348, "task_loss": 2.485527515411377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959109150238624, "compression/movement_sparsity/importance_threshold": -2.8638866827641017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190376920077169, "compression/movement_sparsity/model_sparsity": 0.8874659199966138, "compression_loss": 105.4048080444336, "distillation_loss": 4.4932861328125, "epoch": 4.52, "learning_rate": 3.043580351272659e-05, "loss": 109.1396, "step": 5349, "task_loss": 3.003786087036133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959324743026002, "compression/movement_sparsity/importance_threshold": -2.848787135645986e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190383597611046, "compression/movement_sparsity/model_sparsity": 0.8874665648106183, "compression_loss": 105.40646362304688, "distillation_loss": 4.153212547302246, "epoch": 4.52, "learning_rate": 3.043110735418428e-05, "loss": 109.2598, "step": 5350, "task_loss": 3.814171075820923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959539576684633, "compression/movement_sparsity/importance_threshold": -2.833740755892305e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919076624415048, "compression/movement_sparsity/model_sparsity": 0.887503514955983, "compression_loss": 105.40805053710938, "distillation_loss": 3.95697283744812, "epoch": 4.52, "learning_rate": 3.042641119564197e-05, "loss": 108.8114, "step": 5351, "task_loss": 1.6072574853897095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959753652553368, "compression/movement_sparsity/importance_threshold": -2.8187474497333616e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190748834865732, "compression/movement_sparsity/model_sparsity": 0.887501833833757, "compression_loss": 105.4096450805664, "distillation_loss": 4.411128997802734, "epoch": 4.52, "learning_rate": 3.042171503709965e-05, "loss": 110.1013, "step": 5352, "task_loss": 1.7729027271270752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9959966971971058, "compression/movement_sparsity/importance_threshold": -2.8038071233996327e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190943556523226, "compression/movement_sparsity/model_sparsity": 0.8875206370707095, "compression_loss": 105.41127014160156, "distillation_loss": 3.273794174194336, "epoch": 4.52, "learning_rate": 3.041701887855734e-05, "loss": 109.4814, "step": 5353, "task_loss": 1.678481101989746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960179536276556, "compression/movement_sparsity/importance_threshold": -2.788919683121508e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191358040590248, "compression/movement_sparsity/model_sparsity": 0.8875606615971313, "compression_loss": 105.41287231445312, "distillation_loss": 4.126544952392578, "epoch": 4.53, "learning_rate": 3.041232272001503e-05, "loss": 109.3074, "step": 5354, "task_loss": 3.0084991455078125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996039134680871, "compression/movement_sparsity/importance_threshold": -2.7740850351294644e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9190977063434282, "compression/movement_sparsity/model_sparsity": 0.8875238726552678, "compression_loss": 105.41444396972656, "distillation_loss": 3.646207809448242, "epoch": 4.53, "learning_rate": 3.0407626561472717e-05, "loss": 109.6451, "step": 5355, "task_loss": 2.360541582107544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960602404906376, "compression/movement_sparsity/importance_threshold": -2.759303085653631e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191141259222628, "compression/movement_sparsity/model_sparsity": 0.8875397281710569, "compression_loss": 105.41600799560547, "distillation_loss": 4.066474437713623, "epoch": 4.53, "learning_rate": 3.04029304029304e-05, "loss": 109.2783, "step": 5356, "task_loss": 1.9582717418670654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9960812711908402, "compression/movement_sparsity/importance_threshold": -2.7445737409246586e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191511266144369, "compression/movement_sparsity/model_sparsity": 0.8875754577756273, "compression_loss": 105.41756439208984, "distillation_loss": 3.0736052989959717, "epoch": 4.53, "learning_rate": 3.039823424438809e-05, "loss": 108.9415, "step": 5357, "task_loss": 2.2394940853118896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961022269153641, "compression/movement_sparsity/importance_threshold": -2.7298969071729365e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191919788127574, "compression/movement_sparsity/model_sparsity": 0.8876149065752593, "compression_loss": 105.41910552978516, "distillation_loss": 4.717273712158203, "epoch": 4.53, "learning_rate": 3.039353808584578e-05, "loss": 109.6797, "step": 5358, "task_loss": 2.720681667327881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961231077980944, "compression/movement_sparsity/importance_threshold": -2.7152724906286813e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192282879032085, "compression/movement_sparsity/model_sparsity": 0.8876499683367537, "compression_loss": 105.42064666748047, "distillation_loss": 3.7186262607574463, "epoch": 4.53, "learning_rate": 3.038884192730347e-05, "loss": 109.6702, "step": 5359, "task_loss": 2.33891224861145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961439139729162, "compression/movement_sparsity/importance_threshold": -2.700700397522543e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191980243657487, "compression/movement_sparsity/model_sparsity": 0.8876207444449072, "compression_loss": 105.42220306396484, "distillation_loss": 4.532159805297852, "epoch": 4.53, "learning_rate": 3.0384145768761156e-05, "loss": 109.5392, "step": 5360, "task_loss": 2.9750657081604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9961646455737146, "compression/movement_sparsity/importance_threshold": -2.686180534084738e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192018997202304, "compression/movement_sparsity/model_sparsity": 0.8876244866690405, "compression_loss": 105.42373657226562, "distillation_loss": 2.9028866291046143, "epoch": 4.53, "learning_rate": 3.0379449610218842e-05, "loss": 109.3587, "step": 5361, "task_loss": 1.1392909288406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996185302734375, "compression/movement_sparsity/importance_threshold": -2.6717128065456563e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192321990301932, "compression/movement_sparsity/model_sparsity": 0.8876537451044944, "compression_loss": 105.42526245117188, "distillation_loss": 4.899295330047607, "epoch": 4.53, "learning_rate": 3.037475345167653e-05, "loss": 110.1914, "step": 5362, "task_loss": 2.608349084854126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962058855887823, "compression/movement_sparsity/importance_threshold": -2.6572971211357745e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91923675406223, "compression/movement_sparsity/model_sparsity": 0.8876581436571679, "compression_loss": 105.42680358886719, "distillation_loss": 4.600330352783203, "epoch": 4.53, "learning_rate": 3.0370057293134218e-05, "loss": 110.3389, "step": 5363, "task_loss": 2.151409387588501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962263942708216, "compression/movement_sparsity/importance_threshold": -2.6429333840854823e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193021223492098, "compression/movement_sparsity/model_sparsity": 0.8877212663423936, "compression_loss": 105.42825317382812, "distillation_loss": 5.096721649169922, "epoch": 4.53, "learning_rate": 3.0365361134591908e-05, "loss": 109.6241, "step": 5364, "task_loss": 2.582681894302368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962468289143782, "compression/movement_sparsity/importance_threshold": -2.6286215016251696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193233235192664, "compression/movement_sparsity/model_sparsity": 0.8877417391870362, "compression_loss": 105.42979431152344, "distillation_loss": 3.665910243988037, "epoch": 4.53, "learning_rate": 3.036066497604959e-05, "loss": 108.9696, "step": 5365, "task_loss": 2.107466459274292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962671896533372, "compression/movement_sparsity/importance_threshold": -2.6143613799852264e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193093841673001, "compression/movement_sparsity/model_sparsity": 0.8877282786946924, "compression_loss": 105.43128967285156, "distillation_loss": 5.877031326293945, "epoch": 4.54, "learning_rate": 3.035596881750728e-05, "loss": 109.7657, "step": 5366, "task_loss": 3.0740747451782227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962874766215837, "compression/movement_sparsity/importance_threshold": -2.6001529253960425e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192995348048328, "compression/movement_sparsity/model_sparsity": 0.8877187676881261, "compression_loss": 105.43279266357422, "distillation_loss": 3.9676191806793213, "epoch": 4.54, "learning_rate": 3.035127265896497e-05, "loss": 110.3255, "step": 5367, "task_loss": 1.950934886932373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963076899530029, "compression/movement_sparsity/importance_threshold": -2.585996044087921e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192658132587587, "compression/movement_sparsity/model_sparsity": 0.8876862045808992, "compression_loss": 105.43434143066406, "distillation_loss": 5.508448600769043, "epoch": 4.54, "learning_rate": 3.0346576500422657e-05, "loss": 109.9107, "step": 5368, "task_loss": 3.0187273025512695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963278297814798, "compression/movement_sparsity/importance_threshold": -2.571890642291512e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193044833344017, "compression/movement_sparsity/model_sparsity": 0.8877235462204809, "compression_loss": 105.4358901977539, "distillation_loss": 3.4196581840515137, "epoch": 4.54, "learning_rate": 3.034188034188034e-05, "loss": 109.4535, "step": 5369, "task_loss": 1.3253545761108398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963478962408997, "compression/movement_sparsity/importance_threshold": -2.5578366262370318e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193582851787747, "compression/movement_sparsity/model_sparsity": 0.8877754998059858, "compression_loss": 105.4374008178711, "distillation_loss": 4.777370929718018, "epoch": 4.54, "learning_rate": 3.033718418333803e-05, "loss": 110.0021, "step": 5370, "task_loss": 2.296750068664551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963678894651476, "compression/movement_sparsity/importance_threshold": -2.5438339021548705e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193870820436154, "compression/movement_sparsity/model_sparsity": 0.8878033074099296, "compression_loss": 105.43897247314453, "distillation_loss": 3.4624719619750977, "epoch": 4.54, "learning_rate": 3.033248802479572e-05, "loss": 109.4799, "step": 5371, "task_loss": 2.8789312839508057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9963878095881089, "compression/movement_sparsity/importance_threshold": -2.5298823762754177e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193914462889701, "compression/movement_sparsity/model_sparsity": 0.8878075217300304, "compression_loss": 105.4404525756836, "distillation_loss": 4.339964866638184, "epoch": 4.54, "learning_rate": 3.032779186625341e-05, "loss": 110.1581, "step": 5372, "task_loss": 2.1647720336914062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964076567436684, "compression/movement_sparsity/importance_threshold": -2.5159819548293237e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9193886083370727, "compression/movement_sparsity/model_sparsity": 0.8878047812705113, "compression_loss": 105.44197082519531, "distillation_loss": 3.2163329124450684, "epoch": 4.54, "learning_rate": 3.0323095707711092e-05, "loss": 109.3896, "step": 5373, "task_loss": 2.8929033279418945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964274310657115, "compression/movement_sparsity/importance_threshold": -2.5021325440465446e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919425406318397, "compression/movement_sparsity/model_sparsity": 0.8878403151279733, "compression_loss": 105.4434585571289, "distillation_loss": 4.213918685913086, "epoch": 4.54, "learning_rate": 3.031839954916878e-05, "loss": 109.7717, "step": 5374, "task_loss": 2.3298094272613525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964471326881232, "compression/movement_sparsity/importance_threshold": -2.488334050157817e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194712666671245, "compression/movement_sparsity/model_sparsity": 0.8878846000326391, "compression_loss": 105.4449462890625, "distillation_loss": 5.185598850250244, "epoch": 4.54, "learning_rate": 3.0313703390626468e-05, "loss": 110.1756, "step": 5375, "task_loss": 2.385098457336426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964667617447887, "compression/movement_sparsity/importance_threshold": -2.4745863793934446e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194594498169975, "compression/movement_sparsity/model_sparsity": 0.8878731891276667, "compression_loss": 105.44644165039062, "distillation_loss": 5.780341625213623, "epoch": 4.54, "learning_rate": 3.0309007232084158e-05, "loss": 109.8744, "step": 5376, "task_loss": 3.3369007110595703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9964863183695931, "compression/movement_sparsity/importance_threshold": -2.4608894379838167e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194840732231655, "compression/movement_sparsity/model_sparsity": 0.8878969666440825, "compression_loss": 105.4478530883789, "distillation_loss": 3.264833450317383, "epoch": 4.54, "learning_rate": 3.0304311073541847e-05, "loss": 108.6197, "step": 5377, "task_loss": 1.8578234910964966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965058026964216, "compression/movement_sparsity/importance_threshold": -2.4472431321593234e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195422750853961, "compression/movement_sparsity/model_sparsity": 0.8879531690932956, "compression_loss": 105.44937896728516, "distillation_loss": 2.9610748291015625, "epoch": 4.55, "learning_rate": 3.029961491499953e-05, "loss": 108.8124, "step": 5378, "task_loss": 2.2311654090881348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965252148591592, "compression/movement_sparsity/importance_threshold": -2.4336473681504413e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195850232263707, "compression/movement_sparsity/model_sparsity": 0.887994448704119, "compression_loss": 105.45084381103516, "distillation_loss": 4.63469123840332, "epoch": 4.55, "learning_rate": 3.029491875645722e-05, "loss": 109.8691, "step": 5379, "task_loss": 3.93216609954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965445549916914, "compression/movement_sparsity/importance_threshold": -2.4201020521873e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919596279640619, "compression/movement_sparsity/model_sparsity": 0.888005318425909, "compression_loss": 105.45220947265625, "distillation_loss": 4.731152534484863, "epoch": 4.55, "learning_rate": 3.0290222597914906e-05, "loss": 109.0632, "step": 5380, "task_loss": 2.890450954437256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965638232279028, "compression/movement_sparsity/importance_threshold": -2.406607090500723e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196367383414075, "compression/movement_sparsity/model_sparsity": 0.88804438724586, "compression_loss": 105.45372009277344, "distillation_loss": 3.114208698272705, "epoch": 4.55, "learning_rate": 3.0285526439372596e-05, "loss": 109.7862, "step": 5381, "task_loss": 2.007270574569702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9965830197016791, "compression/movement_sparsity/importance_threshold": -2.3931623893206667e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196493302624309, "compression/movement_sparsity/model_sparsity": 0.888056546595659, "compression_loss": 105.45516204833984, "distillation_loss": 4.088094711303711, "epoch": 4.55, "learning_rate": 3.028083028083028e-05, "loss": 109.4194, "step": 5382, "task_loss": 1.8571776151657104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966021445469049, "compression/movement_sparsity/importance_threshold": -2.3797678548779545e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197189554772567, "compression/movement_sparsity/model_sparsity": 0.8881237799701633, "compression_loss": 105.45657348632812, "distillation_loss": 4.378728866577148, "epoch": 4.55, "learning_rate": 3.027613412228797e-05, "loss": 109.4723, "step": 5383, "task_loss": 3.2587201595306396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966211978974657, "compression/movement_sparsity/importance_threshold": -2.3664233934026294e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197008307424502, "compression/movement_sparsity/model_sparsity": 0.8881062778757556, "compression_loss": 105.4579849243164, "distillation_loss": 3.4675512313842773, "epoch": 4.55, "learning_rate": 3.027143796374566e-05, "loss": 109.6793, "step": 5384, "task_loss": 1.9856019020080566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966401798872466, "compression/movement_sparsity/importance_threshold": -2.3531289111253413e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919753165914204, "compression/movement_sparsity/model_sparsity": 0.8881568151733579, "compression_loss": 105.4593505859375, "distillation_loss": 2.7578558921813965, "epoch": 4.55, "learning_rate": 3.0266741805203348e-05, "loss": 109.4444, "step": 5385, "task_loss": 1.8978077173233032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966590906501327, "compression/movement_sparsity/importance_threshold": -2.3398843142763068e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197703963364379, "compression/movement_sparsity/model_sparsity": 0.8881734536775809, "compression_loss": 105.46074676513672, "distillation_loss": 3.9399633407592773, "epoch": 4.55, "learning_rate": 3.026204564666103e-05, "loss": 109.8548, "step": 5386, "task_loss": 3.3344199657440186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9966779303200091, "compression/movement_sparsity/importance_threshold": -2.3266895090860024e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197591637705248, "compression/movement_sparsity/model_sparsity": 0.8881626069848625, "compression_loss": 105.4620590209961, "distillation_loss": 3.4882378578186035, "epoch": 4.55, "learning_rate": 3.0257349488118718e-05, "loss": 110.0376, "step": 5387, "task_loss": 1.383824110031128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996696699030761, "compression/movement_sparsity/importance_threshold": -2.313544401784818e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197608331539938, "compression/movement_sparsity/model_sparsity": 0.8881642190198737, "compression_loss": 105.46339416503906, "distillation_loss": 3.0484070777893066, "epoch": 4.55, "learning_rate": 3.0252653329576407e-05, "loss": 109.0192, "step": 5388, "task_loss": 1.2953248023986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967153969162734, "compression/movement_sparsity/importance_threshold": -2.30044889860323e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197296156831232, "compression/movement_sparsity/model_sparsity": 0.8881340739651636, "compression_loss": 105.46469116210938, "distillation_loss": 5.782516002655029, "epoch": 4.56, "learning_rate": 3.0247957171034097e-05, "loss": 109.5009, "step": 5389, "task_loss": 3.402012348175049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967340241104317, "compression/movement_sparsity/importance_threshold": -2.2874029057714552e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197008188182826, "compression/movement_sparsity/model_sparsity": 0.8881062663612198, "compression_loss": 105.4659423828125, "distillation_loss": 4.935763835906982, "epoch": 4.56, "learning_rate": 3.0243261012491787e-05, "loss": 109.8576, "step": 5390, "task_loss": 4.032351016998291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967525807471208, "compression/movement_sparsity/importance_threshold": -2.2744063295200567e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197104535457323, "compression/movement_sparsity/model_sparsity": 0.8881155701061418, "compression_loss": 105.46715545654297, "distillation_loss": 5.347166061401367, "epoch": 4.56, "learning_rate": 3.023856485394947e-05, "loss": 110.3588, "step": 5391, "task_loss": 2.7985904216766357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996771066960226, "compression/movement_sparsity/importance_threshold": -2.2614590760793377e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197123733367217, "compression/movement_sparsity/model_sparsity": 0.8881174239464047, "compression_loss": 105.46841430664062, "distillation_loss": 3.6117000579833984, "epoch": 4.56, "learning_rate": 3.023386869540716e-05, "loss": 109.2145, "step": 5392, "task_loss": 2.4229769706726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9967894828836323, "compression/movement_sparsity/importance_threshold": -2.248561051679688e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196755753553975, "compression/movement_sparsity/model_sparsity": 0.8880818900889428, "compression_loss": 105.46964263916016, "distillation_loss": 3.3416757583618164, "epoch": 4.56, "learning_rate": 3.0229172536864846e-05, "loss": 108.7987, "step": 5393, "task_loss": 1.990061640739441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.996807828651225, "compression/movement_sparsity/importance_threshold": -2.235712162551671e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197323701658471, "compression/movement_sparsity/model_sparsity": 0.8881367338229321, "compression_loss": 105.47087860107422, "distillation_loss": 3.136685848236084, "epoch": 4.56, "learning_rate": 3.0224476378322535e-05, "loss": 109.2282, "step": 5394, "task_loss": 1.9621330499649048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968261043968891, "compression/movement_sparsity/importance_threshold": -2.222912314925416e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197631106700123, "compression/movement_sparsity/model_sparsity": 0.8881664182962105, "compression_loss": 105.47207641601562, "distillation_loss": 5.968227863311768, "epoch": 4.56, "learning_rate": 3.021978021978022e-05, "loss": 109.9436, "step": 5395, "task_loss": 3.492405652999878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968443102545098, "compression/movement_sparsity/importance_threshold": -2.2101614150315738e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198399857787607, "compression/movement_sparsity/model_sparsity": 0.888240652508478, "compression_loss": 105.47335815429688, "distillation_loss": 4.417186737060547, "epoch": 4.56, "learning_rate": 3.0215084061237908e-05, "loss": 109.2984, "step": 5396, "task_loss": 2.488494396209717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968624463579723, "compression/movement_sparsity/importance_threshold": -2.197459369100447e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919890079206999, "compression/movement_sparsity/model_sparsity": 0.8882890250733507, "compression_loss": 105.47454071044922, "distillation_loss": 3.561026096343994, "epoch": 4.56, "learning_rate": 3.0210387902695598e-05, "loss": 109.5969, "step": 5397, "task_loss": 1.6686204671859741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968805128411616, "compression/movement_sparsity/importance_threshold": -2.1848060833623388e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919880778356243, "compression/movement_sparsity/model_sparsity": 0.888280043735431, "compression_loss": 105.47574615478516, "distillation_loss": 3.9778554439544678, "epoch": 4.56, "learning_rate": 3.0205691744153288e-05, "loss": 109.305, "step": 5398, "task_loss": 1.7907391786575317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9968985098379629, "compression/movement_sparsity/importance_threshold": -2.1722014640478127e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197684765454485, "compression/movement_sparsity/model_sparsity": 0.8881715998373181, "compression_loss": 105.47701263427734, "distillation_loss": 4.297873497009277, "epoch": 4.56, "learning_rate": 3.020099558561097e-05, "loss": 109.7092, "step": 5399, "task_loss": 2.272947311401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969164374822614, "compression/movement_sparsity/importance_threshold": -2.1596454173871717e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197937677050041, "compression/movement_sparsity/model_sparsity": 0.8881960221677383, "compression_loss": 105.47819519042969, "distillation_loss": 3.4612648487091064, "epoch": 4.56, "learning_rate": 3.0196299427068657e-05, "loss": 109.5361, "step": 5400, "task_loss": 1.7764991521835327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969342959079422, "compression/movement_sparsity/importance_threshold": -2.1471378496108057e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198075401186235, "compression/movement_sparsity/model_sparsity": 0.8882093214565809, "compression_loss": 105.47940826416016, "distillation_loss": 4.036114692687988, "epoch": 4.57, "learning_rate": 3.0191603268526347e-05, "loss": 109.9489, "step": 5401, "task_loss": 1.870722770690918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969520852488906, "compression/movement_sparsity/importance_threshold": -2.1346786669490178e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919891879756312, "compression/movement_sparsity/model_sparsity": 0.8882907637682558, "compression_loss": 105.48066711425781, "distillation_loss": 6.202736854553223, "epoch": 4.57, "learning_rate": 3.0186907109984036e-05, "loss": 109.6531, "step": 5402, "task_loss": 2.6868467330932617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969698056389914, "compression/movement_sparsity/importance_threshold": -2.122267775632458e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199438333547014, "compression/movement_sparsity/model_sparsity": 0.8883409326007126, "compression_loss": 105.48190307617188, "distillation_loss": 4.627185821533203, "epoch": 4.57, "learning_rate": 3.018221095144172e-05, "loss": 109.6802, "step": 5403, "task_loss": 2.583956480026245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9969874572121299, "compression/movement_sparsity/importance_threshold": -2.109905081891343e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199580708108587, "compression/movement_sparsity/model_sparsity": 0.8883546809564512, "compression_loss": 105.48314666748047, "distillation_loss": 3.526808500289917, "epoch": 4.57, "learning_rate": 3.017751479289941e-05, "loss": 109.0925, "step": 5404, "task_loss": 1.9932243824005127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970050401021913, "compression/movement_sparsity/importance_threshold": -2.0975904919561493e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199677055383084, "compression/movement_sparsity/model_sparsity": 0.8883639847013732, "compression_loss": 105.484375, "distillation_loss": 4.282423973083496, "epoch": 4.57, "learning_rate": 3.01728186343571e-05, "loss": 109.9214, "step": 5405, "task_loss": 3.657771348953247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970225544430608, "compression/movement_sparsity/importance_threshold": -2.0853239120571798e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199280457567516, "compression/movement_sparsity/model_sparsity": 0.8883256873553206, "compression_loss": 105.48556518554688, "distillation_loss": 4.78279972076416, "epoch": 4.57, "learning_rate": 3.0168122475814785e-05, "loss": 109.618, "step": 5406, "task_loss": 2.6205201148986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970400003686233, "compression/movement_sparsity/importance_threshold": -2.073105248424998e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199251720323514, "compression/movement_sparsity/model_sparsity": 0.8883229123521941, "compression_loss": 105.48680877685547, "distillation_loss": 3.8371617794036865, "epoch": 4.57, "learning_rate": 3.0163426317272475e-05, "loss": 109.7411, "step": 5407, "task_loss": 2.1720006465911865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970573780127642, "compression/movement_sparsity/importance_threshold": -2.0609344072897336e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199604794927211, "compression/movement_sparsity/model_sparsity": 0.8883570068926817, "compression_loss": 105.48800659179688, "distillation_loss": 4.432809829711914, "epoch": 4.57, "learning_rate": 3.0158730158730158e-05, "loss": 109.0317, "step": 5408, "task_loss": 2.487034320831299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970746875093683, "compression/movement_sparsity/importance_threshold": -2.0488112948821233e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199695299359567, "compression/movement_sparsity/model_sparsity": 0.8883657464253498, "compression_loss": 105.4891357421875, "distillation_loss": 3.049424409866333, "epoch": 4.57, "learning_rate": 3.0154034000187847e-05, "loss": 109.3342, "step": 5409, "task_loss": 1.546929121017456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9970919289923212, "compression/movement_sparsity/importance_threshold": -2.0367358174322968e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.919990969589366, "compression/movement_sparsity/model_sparsity": 0.8883864495607083, "compression_loss": 105.49037170410156, "distillation_loss": 3.317962646484375, "epoch": 4.57, "learning_rate": 3.0149337841645537e-05, "loss": 109.5388, "step": 5410, "task_loss": 0.9532434344291687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971091025955078, "compression/movement_sparsity/importance_threshold": -2.0247078811707307e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200005804684805, "compression/movement_sparsity/model_sparsity": 0.8883957302765587, "compression_loss": 105.49156951904297, "distillation_loss": 4.547045707702637, "epoch": 4.57, "learning_rate": 3.0144641683103224e-05, "loss": 109.2445, "step": 5411, "task_loss": 2.634608268737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971262084528132, "compression/movement_sparsity/importance_threshold": -2.0127273923279017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200108948734855, "compression/movement_sparsity/model_sparsity": 0.888405690350021, "compression_loss": 105.49274444580078, "distillation_loss": 3.726978063583374, "epoch": 4.57, "learning_rate": 3.013994552456091e-05, "loss": 108.882, "step": 5412, "task_loss": 2.5920374393463135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971432466981225, "compression/movement_sparsity/importance_threshold": -2.0007942571341128e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199282842401043, "compression/movement_sparsity/model_sparsity": 0.8883259176460365, "compression_loss": 105.49391174316406, "distillation_loss": 4.775929927825928, "epoch": 4.58, "learning_rate": 3.0135249366018596e-05, "loss": 110.1901, "step": 5413, "task_loss": 2.1523897647857666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997160217465321, "compression/movement_sparsity/importance_threshold": -1.988908381819754e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199744546171903, "compression/movement_sparsity/model_sparsity": 0.888370501928633, "compression_loss": 105.4951171875, "distillation_loss": 5.30881404876709, "epoch": 4.58, "learning_rate": 3.0130553207476286e-05, "loss": 109.8048, "step": 5414, "task_loss": 3.0027213096618652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9971771208882938, "compression/movement_sparsity/importance_threshold": -1.9770696726153017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199871896282255, "compression/movement_sparsity/model_sparsity": 0.8883827994528615, "compression_loss": 105.49625396728516, "distillation_loss": 3.2653913497924805, "epoch": 4.58, "learning_rate": 3.0125857048933976e-05, "loss": 109.4935, "step": 5415, "task_loss": 1.4934067726135254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997193957100926, "compression/movement_sparsity/importance_threshold": -1.965278035751146e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200267182439383, "compression/movement_sparsity/model_sparsity": 0.8884209701390204, "compression_loss": 105.49735260009766, "distillation_loss": 3.640929937362671, "epoch": 4.58, "learning_rate": 3.012116089039166e-05, "loss": 109.3166, "step": 5416, "task_loss": 1.6433173418045044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972107262371027, "compression/movement_sparsity/importance_threshold": -1.953533377457503e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200069837465009, "compression/movement_sparsity/model_sparsity": 0.8884019135822805, "compression_loss": 105.49849700927734, "distillation_loss": 4.611537933349609, "epoch": 4.58, "learning_rate": 3.011646473184935e-05, "loss": 110.2838, "step": 5417, "task_loss": 3.0056958198547363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972274284307091, "compression/movement_sparsity/importance_threshold": -1.94183560396511e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200309036267785, "compression/movement_sparsity/model_sparsity": 0.8884250117410842, "compression_loss": 105.49968719482422, "distillation_loss": 3.729404926300049, "epoch": 4.58, "learning_rate": 3.0111768573307038e-05, "loss": 109.3573, "step": 5418, "task_loss": 2.040515661239624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972440638156304, "compression/movement_sparsity/importance_threshold": -1.9301846215040962e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200482652148563, "compression/movement_sparsity/model_sparsity": 0.8884417769052011, "compression_loss": 105.50084686279297, "distillation_loss": 5.651251792907715, "epoch": 4.58, "learning_rate": 3.0107072414764724e-05, "loss": 109.7857, "step": 5419, "task_loss": 4.400290012359619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972606325257516, "compression/movement_sparsity/importance_threshold": -1.9185803363048516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200630631068925, "compression/movement_sparsity/model_sparsity": 0.8884560664441221, "compression_loss": 105.50201416015625, "distillation_loss": 4.091880798339844, "epoch": 4.58, "learning_rate": 3.0102376256222407e-05, "loss": 109.3682, "step": 5420, "task_loss": 2.8512072563171387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972771346949579, "compression/movement_sparsity/importance_threshold": -1.9070226545979396e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200387020324123, "compression/movement_sparsity/model_sparsity": 0.8884325422474939, "compression_loss": 105.503173828125, "distillation_loss": 4.016436576843262, "epoch": 4.58, "learning_rate": 3.0097680097680097e-05, "loss": 109.6879, "step": 5421, "task_loss": 2.579033374786377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9972935704571345, "compression/movement_sparsity/importance_threshold": -1.8955114826136633e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200772528663791, "compression/movement_sparsity/model_sparsity": 0.8884697687417177, "compression_loss": 105.50439453125, "distillation_loss": 6.383502960205078, "epoch": 4.58, "learning_rate": 3.0092983939137787e-05, "loss": 109.6733, "step": 5422, "task_loss": 2.866666555404663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973099399461665, "compression/movement_sparsity/importance_threshold": -1.8840467265824994e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200537264836336, "compression/movement_sparsity/model_sparsity": 0.888447050562595, "compression_loss": 105.50558471679688, "distillation_loss": 4.0242533683776855, "epoch": 4.58, "learning_rate": 3.0088287780595477e-05, "loss": 110.1094, "step": 5423, "task_loss": 1.5807554721832275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997326243295939, "compression/movement_sparsity/importance_threshold": -1.872628292734664e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200336819578376, "compression/movement_sparsity/model_sparsity": 0.8884276946279244, "compression_loss": 105.50672912597656, "distillation_loss": 6.1720781326293945, "epoch": 4.58, "learning_rate": 3.0083591622053163e-05, "loss": 110.3509, "step": 5424, "task_loss": 3.150758743286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973424806403371, "compression/movement_sparsity/importance_threshold": -1.8612560873008076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200354228863125, "compression/movement_sparsity/model_sparsity": 0.8884293757501505, "compression_loss": 105.50785064697266, "distillation_loss": 3.480012893676758, "epoch": 4.59, "learning_rate": 3.007889546351085e-05, "loss": 109.1464, "step": 5425, "task_loss": 1.575105905532837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973586521132461, "compression/movement_sparsity/importance_threshold": -1.8499300165111462e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920060392093342, "compression/movement_sparsity/model_sparsity": 0.8884534871881042, "compression_loss": 105.50898742675781, "distillation_loss": 4.788480758666992, "epoch": 4.59, "learning_rate": 3.0074199304968536e-05, "loss": 109.9181, "step": 5426, "task_loss": 2.152116060256958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973747578485511, "compression/movement_sparsity/importance_threshold": -1.83864998659607e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201151001744553, "compression/movement_sparsity/model_sparsity": 0.8885063158783295, "compression_loss": 105.51004791259766, "distillation_loss": 5.405692100524902, "epoch": 4.59, "learning_rate": 3.0069503146426225e-05, "loss": 110.1407, "step": 5427, "task_loss": 2.615222692489624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9973907979801372, "compression/movement_sparsity/importance_threshold": -1.8274159037860553e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201977942770101, "compression/movement_sparsity/model_sparsity": 0.8885861691840646, "compression_loss": 105.51116180419922, "distillation_loss": 4.726553916931152, "epoch": 4.59, "learning_rate": 3.0064806987883915e-05, "loss": 109.2017, "step": 5428, "task_loss": 2.1050519943237305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974067726418895, "compression/movement_sparsity/importance_threshold": -1.8162276743114922e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203000440144876, "compression/movement_sparsity/model_sparsity": 0.8886849063285028, "compression_loss": 105.51224517822266, "distillation_loss": 4.408090591430664, "epoch": 4.59, "learning_rate": 3.0060110829341598e-05, "loss": 109.7134, "step": 5429, "task_loss": 2.938549757003784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974226819676932, "compression/movement_sparsity/importance_threshold": -1.8050852044026837e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202714260121615, "compression/movement_sparsity/model_sparsity": 0.888657271442596, "compression_loss": 105.5133056640625, "distillation_loss": 3.5833630561828613, "epoch": 4.59, "learning_rate": 3.0055414670799288e-05, "loss": 109.8828, "step": 5430, "task_loss": 2.7433745861053467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974385260914334, "compression/movement_sparsity/importance_threshold": -1.7939884002901932e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202721653105549, "compression/movement_sparsity/model_sparsity": 0.8886579853438152, "compression_loss": 105.51435089111328, "distillation_loss": 5.136394500732422, "epoch": 4.59, "learning_rate": 3.0050718512256974e-05, "loss": 109.936, "step": 5431, "task_loss": 1.9441114664077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974543051469952, "compression/movement_sparsity/importance_threshold": -1.7829371682043238e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202688146194492, "compression/movement_sparsity/model_sparsity": 0.888654749759257, "compression_loss": 105.51541137695312, "distillation_loss": 5.497203826904297, "epoch": 4.59, "learning_rate": 3.0046022353714664e-05, "loss": 110.1104, "step": 5432, "task_loss": 3.15281343460083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974700192682638, "compression/movement_sparsity/importance_threshold": -1.7719314143754654e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202381456602898, "compression/movement_sparsity/model_sparsity": 0.8886251343731935, "compression_loss": 105.5165023803711, "distillation_loss": 5.259067058563232, "epoch": 4.59, "learning_rate": 3.0041326195172347e-05, "loss": 109.7755, "step": 5433, "task_loss": 3.562157154083252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9974856685891245, "compression/movement_sparsity/importance_threshold": -1.760971045034008e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202509641404983, "compression/movement_sparsity/model_sparsity": 0.8886375124991726, "compression_loss": 105.51759338378906, "distillation_loss": 4.971002578735352, "epoch": 4.59, "learning_rate": 3.0036630036630036e-05, "loss": 110.0231, "step": 5434, "task_loss": 1.891106128692627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975012532434622, "compression/movement_sparsity/importance_threshold": -1.7500559664104277e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202753490633137, "compression/movement_sparsity/model_sparsity": 0.8886610597248724, "compression_loss": 105.51864624023438, "distillation_loss": 5.3290557861328125, "epoch": 4.59, "learning_rate": 3.0031933878087726e-05, "loss": 110.5705, "step": 5435, "task_loss": 3.0382227897644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975167733651621, "compression/movement_sparsity/importance_threshold": -1.7391860847350282e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920275205973302, "compression/movement_sparsity/model_sparsity": 0.8886609215504429, "compression_loss": 105.51971435546875, "distillation_loss": 4.268392562866211, "epoch": 4.59, "learning_rate": 3.0027237719545416e-05, "loss": 109.5617, "step": 5436, "task_loss": 2.192270040512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975322290881093, "compression/movement_sparsity/importance_threshold": -1.728361306238286e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202429987965176, "compression/movement_sparsity/model_sparsity": 0.8886298207892618, "compression_loss": 105.52078247070312, "distillation_loss": 4.538769721984863, "epoch": 4.6, "learning_rate": 3.0022541561003102e-05, "loss": 109.4774, "step": 5437, "task_loss": 3.0774691104888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975476205461891, "compression/movement_sparsity/importance_threshold": -1.717581537150504e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202198182146335, "compression/movement_sparsity/model_sparsity": 0.8886074365316772, "compression_loss": 105.52180480957031, "distillation_loss": 4.1216230392456055, "epoch": 4.6, "learning_rate": 3.001784540246079e-05, "loss": 109.2754, "step": 5438, "task_loss": 3.4140119552612305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975629478732865, "compression/movement_sparsity/importance_threshold": -1.706846683702159e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9201922256907241, "compression/movement_sparsity/model_sparsity": 0.8885807918958486, "compression_loss": 105.52288818359375, "distillation_loss": 3.224083423614502, "epoch": 4.6, "learning_rate": 3.0013149243918475e-05, "loss": 109.5837, "step": 5439, "task_loss": 1.856462836265564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975782112032866, "compression/movement_sparsity/importance_threshold": -1.6961566521237273e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202125086998727, "compression/movement_sparsity/model_sparsity": 0.8886003781212352, "compression_loss": 105.5239028930664, "distillation_loss": 5.161131858825684, "epoch": 4.6, "learning_rate": 3.0008453085376165e-05, "loss": 109.7338, "step": 5440, "task_loss": 3.1608545780181885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9975934106700748, "compression/movement_sparsity/importance_threshold": -1.685511348645339e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202338291116057, "compression/movement_sparsity/model_sparsity": 0.8886209661112358, "compression_loss": 105.52497100830078, "distillation_loss": 3.9445438385009766, "epoch": 4.6, "learning_rate": 3.0003756926833854e-05, "loss": 109.463, "step": 5441, "task_loss": 3.3866138458251953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976085464075359, "compression/movement_sparsity/importance_threshold": -1.674910679497644e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202292025345629, "compression/movement_sparsity/model_sparsity": 0.8886164984713475, "compression_loss": 105.52599334716797, "distillation_loss": 5.2165207862854, "epoch": 4.6, "learning_rate": 2.9999060768291537e-05, "loss": 109.5505, "step": 5442, "task_loss": 2.4473001956939697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976236185495553, "compression/movement_sparsity/importance_threshold": -1.664354550911032e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920282837440589, "compression/movement_sparsity/model_sparsity": 0.8886682908533513, "compression_loss": 105.52703094482422, "distillation_loss": 4.258654594421387, "epoch": 4.6, "learning_rate": 2.9994364609749227e-05, "loss": 109.1194, "step": 5443, "task_loss": 1.7345026731491089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976386272300181, "compression/movement_sparsity/importance_threshold": -1.6538428691156327e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202937719023111, "compression/movement_sparsity/model_sparsity": 0.888678849682675, "compression_loss": 105.52809143066406, "distillation_loss": 4.340701580047607, "epoch": 4.6, "learning_rate": 2.9989668451206913e-05, "loss": 109.8629, "step": 5444, "task_loss": 2.546527862548828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976535725828093, "compression/movement_sparsity/importance_threshold": -1.6433755403421832e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202164794476955, "compression/movement_sparsity/model_sparsity": 0.8886042124616548, "compression_loss": 105.52909088134766, "distillation_loss": 3.998823881149292, "epoch": 4.6, "learning_rate": 2.9984972292664603e-05, "loss": 109.7423, "step": 5445, "task_loss": 1.9490429162979126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976684547418141, "compression/movement_sparsity/importance_threshold": -1.6329524708208996e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202383960678102, "compression/movement_sparsity/model_sparsity": 0.8886253761784451, "compression_loss": 105.53009033203125, "distillation_loss": 4.013250350952148, "epoch": 4.6, "learning_rate": 2.9980276134122286e-05, "loss": 109.4206, "step": 5446, "task_loss": 3.065840244293213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976832738409177, "compression/movement_sparsity/importance_threshold": -1.622573566782172e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202834574973061, "compression/movement_sparsity/model_sparsity": 0.8886688896092126, "compression_loss": 105.53105163574219, "distillation_loss": 3.6554760932922363, "epoch": 4.6, "learning_rate": 2.9975579975579976e-05, "loss": 109.505, "step": 5447, "task_loss": 2.0333545207977295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9976980300140053, "compression/movement_sparsity/importance_threshold": -1.6122387344564768e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202830759239418, "compression/movement_sparsity/model_sparsity": 0.8886685211440672, "compression_loss": 105.53204345703125, "distillation_loss": 3.490699052810669, "epoch": 4.6, "learning_rate": 2.9970883817037666e-05, "loss": 109.3694, "step": 5448, "task_loss": 1.384548306465149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977127233949618, "compression/movement_sparsity/importance_threshold": -1.601947880074204e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203423509612596, "compression/movement_sparsity/model_sparsity": 0.8887257599015019, "compression_loss": 105.53298950195312, "distillation_loss": 4.419117450714111, "epoch": 4.61, "learning_rate": 2.9966187658495355e-05, "loss": 109.4583, "step": 5449, "task_loss": 2.294048547744751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977273541176725, "compression/movement_sparsity/importance_threshold": -1.5917009098656565e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203840855479851, "compression/movement_sparsity/model_sparsity": 0.8887660607767828, "compression_loss": 105.53385925292969, "distillation_loss": 4.980733871459961, "epoch": 4.61, "learning_rate": 2.9961491499953038e-05, "loss": 109.7196, "step": 5450, "task_loss": 2.9670205116271973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977419223160227, "compression/movement_sparsity/importance_threshold": -1.581497730061398e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203977148715929, "compression/movement_sparsity/model_sparsity": 0.888779221891196, "compression_loss": 105.53483581542969, "distillation_loss": 4.469579696655273, "epoch": 4.61, "learning_rate": 2.9956795341410725e-05, "loss": 109.5081, "step": 5451, "task_loss": 2.549023151397705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977564281238972, "compression/movement_sparsity/importance_threshold": -1.5713382468916445e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920440641875082, "compression/movement_sparsity/model_sparsity": 0.8888206742200563, "compression_loss": 105.53581237792969, "distillation_loss": 4.998918533325195, "epoch": 4.61, "learning_rate": 2.9952099182868414e-05, "loss": 110.3311, "step": 5452, "task_loss": 2.6924219131469727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977708716751814, "compression/movement_sparsity/importance_threshold": -1.561222366586873e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204366592030917, "compression/movement_sparsity/model_sparsity": 0.8888168283651009, "compression_loss": 105.53672790527344, "distillation_loss": 4.141615867614746, "epoch": 4.61, "learning_rate": 2.9947403024326104e-05, "loss": 110.0063, "step": 5453, "task_loss": 2.9852094650268555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977852531037603, "compression/movement_sparsity/importance_threshold": -1.551149995377473e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205023852151005, "compression/movement_sparsity/model_sparsity": 0.8888802964864004, "compression_loss": 105.53768157958984, "distillation_loss": 4.295146942138672, "epoch": 4.61, "learning_rate": 2.9942706865783794e-05, "loss": 109.9629, "step": 5454, "task_loss": 3.4808645248413086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9977995725435191, "compression/movement_sparsity/importance_threshold": -1.5411210394939215e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205119483975445, "compression/movement_sparsity/model_sparsity": 0.8888895311441076, "compression_loss": 105.53864288330078, "distillation_loss": 5.221095085144043, "epoch": 4.61, "learning_rate": 2.9938010707241477e-05, "loss": 110.4152, "step": 5455, "task_loss": 3.114377975463867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997813830128343, "compression/movement_sparsity/importance_threshold": -1.5311354051665213e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205163007187316, "compression/movement_sparsity/model_sparsity": 0.8888937339496726, "compression_loss": 105.53961944580078, "distillation_loss": 4.532803535461426, "epoch": 4.61, "learning_rate": 2.9933314548699166e-05, "loss": 109.595, "step": 5456, "task_loss": 1.9667948484420776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978280259921171, "compression/movement_sparsity/importance_threshold": -1.5211929986256624e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205378238413143, "compression/movement_sparsity/model_sparsity": 0.8889145176867818, "compression_loss": 105.54054260253906, "distillation_loss": 3.1912975311279297, "epoch": 4.61, "learning_rate": 2.9928618390156853e-05, "loss": 109.2794, "step": 5457, "task_loss": 2.0149428844451904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978421602687264, "compression/movement_sparsity/importance_threshold": -1.5112937261018214e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205403279165179, "compression/movement_sparsity/model_sparsity": 0.8889169357392986, "compression_loss": 105.5415267944336, "distillation_loss": 3.9845850467681885, "epoch": 4.61, "learning_rate": 2.9923922231614543e-05, "loss": 109.2521, "step": 5458, "task_loss": 1.4609997272491455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978562330920562, "compression/movement_sparsity/importance_threshold": -1.5014374938253014e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205084665405948, "compression/movement_sparsity/model_sparsity": 0.8888861688996557, "compression_loss": 105.54244232177734, "distillation_loss": 6.106750011444092, "epoch": 4.61, "learning_rate": 2.9919226073072225e-05, "loss": 110.6262, "step": 5459, "task_loss": 4.572825908660889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978702445959916, "compression/movement_sparsity/importance_threshold": -1.491624208026579e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205084903889301, "compression/movement_sparsity/model_sparsity": 0.8888861919287272, "compression_loss": 105.54338073730469, "distillation_loss": 4.5230889320373535, "epoch": 4.61, "learning_rate": 2.9914529914529915e-05, "loss": 109.3057, "step": 5460, "task_loss": 1.55849027633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978841949144177, "compression/movement_sparsity/importance_threshold": -1.4818537749360441e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204459362055124, "compression/movement_sparsity/model_sparsity": 0.8888257866739491, "compression_loss": 105.54432678222656, "distillation_loss": 2.8002400398254395, "epoch": 4.62, "learning_rate": 2.9909833755987605e-05, "loss": 108.3748, "step": 5461, "task_loss": 1.3882176876068115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9978980841812197, "compression/movement_sparsity/importance_threshold": -1.4721261007840866e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204676858872802, "compression/movement_sparsity/model_sparsity": 0.8888467891872384, "compression_loss": 105.54521179199219, "distillation_loss": 5.3518571853637695, "epoch": 4.62, "learning_rate": 2.9905137597445295e-05, "loss": 110.0048, "step": 5462, "task_loss": 3.5659778118133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979119125302828, "compression/movement_sparsity/importance_threshold": -1.4624410918010096e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205679085162596, "compression/movement_sparsity/model_sparsity": 0.8889435688605914, "compression_loss": 105.54608154296875, "distillation_loss": 4.561398029327393, "epoch": 4.62, "learning_rate": 2.9900441438902978e-05, "loss": 109.0262, "step": 5463, "task_loss": 2.1509876251220703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997925680095492, "compression/movement_sparsity/importance_threshold": -1.4527986542173764e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206102512355345, "compression/movement_sparsity/model_sparsity": 0.8889844569771979, "compression_loss": 105.54691314697266, "distillation_loss": 4.082301616668701, "epoch": 4.62, "learning_rate": 2.9895745280360664e-05, "loss": 109.388, "step": 5464, "task_loss": 2.8342440128326416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979393870107324, "compression/movement_sparsity/importance_threshold": -1.4431986942634902e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205886923404489, "compression/movement_sparsity/model_sparsity": 0.8889636386964813, "compression_loss": 105.5477294921875, "distillation_loss": 4.635745525360107, "epoch": 4.62, "learning_rate": 2.9891049121818354e-05, "loss": 109.8422, "step": 5465, "task_loss": 1.616660714149475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979530334098894, "compression/movement_sparsity/importance_threshold": -1.4336411181697407e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206219846164883, "compression/movement_sparsity/model_sparsity": 0.8889957872804197, "compression_loss": 105.54859161376953, "distillation_loss": 3.7916207313537598, "epoch": 4.62, "learning_rate": 2.9886352963276043e-05, "loss": 110.2712, "step": 5466, "task_loss": 1.5971463918685913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979666194268478, "compression/movement_sparsity/importance_threshold": -1.4241258321666048e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206097742688292, "compression/movement_sparsity/model_sparsity": 0.8889839963957661, "compression_loss": 105.54937744140625, "distillation_loss": 5.486489772796631, "epoch": 4.62, "learning_rate": 2.9881656804733733e-05, "loss": 110.2393, "step": 5467, "task_loss": 3.099483013153076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.997980145195493, "compression/movement_sparsity/importance_threshold": -1.4146527424843854e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206345884616793, "compression/movement_sparsity/model_sparsity": 0.8890079581447545, "compression_loss": 105.55021667480469, "distillation_loss": 4.403571128845215, "epoch": 4.62, "learning_rate": 2.9876960646191416e-05, "loss": 110.0653, "step": 5468, "task_loss": 2.2342982292175293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9979936108497102, "compression/movement_sparsity/importance_threshold": -1.4052217553534724e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206632303123407, "compression/movement_sparsity/model_sparsity": 0.889035616059733, "compression_loss": 105.55101013183594, "distillation_loss": 5.160192489624023, "epoch": 4.62, "learning_rate": 2.9872264487649106e-05, "loss": 109.8276, "step": 5469, "task_loss": 2.0440902709960938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980070165233843, "compression/movement_sparsity/importance_threshold": -1.3958327770042557e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206772889059833, "compression/movement_sparsity/model_sparsity": 0.8890491916974348, "compression_loss": 105.55180358886719, "distillation_loss": 3.341071605682373, "epoch": 4.62, "learning_rate": 2.9867568329106792e-05, "loss": 109.2989, "step": 5470, "task_loss": 2.2177059650421143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980203623504005, "compression/movement_sparsity/importance_threshold": -1.3864857136673854e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207051199132454, "compression/movement_sparsity/model_sparsity": 0.8890760666239792, "compression_loss": 105.55258178710938, "distillation_loss": 4.070933818817139, "epoch": 4.62, "learning_rate": 2.9862872170564482e-05, "loss": 109.2709, "step": 5471, "task_loss": 1.638122320175171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998033648464644, "compression/movement_sparsity/importance_threshold": -1.3771804715729044e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920764538040575, "compression/movement_sparsity/model_sparsity": 0.8891334435558435, "compression_loss": 105.5533676147461, "distillation_loss": 4.104494094848633, "epoch": 4.63, "learning_rate": 2.9858176012022165e-05, "loss": 109.1266, "step": 5472, "task_loss": 2.204360246658325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998046875, "compression/movement_sparsity/importance_threshold": -1.367916956951376e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207605434444169, "compression/movement_sparsity/model_sparsity": 0.8891295861863522, "compression_loss": 105.55414581298828, "distillation_loss": 4.091855525970459, "epoch": 4.63, "learning_rate": 2.9853479853479855e-05, "loss": 109.6253, "step": 5473, "task_loss": 3.016900062561035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980600420903535, "compression/movement_sparsity/importance_threshold": -1.358695076033277e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207917132186171, "compression/movement_sparsity/model_sparsity": 0.8891596851829192, "compression_loss": 105.554931640625, "distillation_loss": 3.9890122413635254, "epoch": 4.63, "learning_rate": 2.9848783694937544e-05, "loss": 110.0982, "step": 5474, "task_loss": 2.9276719093322754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980731498695897, "compression/movement_sparsity/importance_threshold": -1.3495147350488235e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207610084869547, "compression/movement_sparsity/model_sparsity": 0.8891300352532482, "compression_loss": 105.55571746826172, "distillation_loss": 3.0866899490356445, "epoch": 4.63, "learning_rate": 2.984408753639523e-05, "loss": 108.9469, "step": 5475, "task_loss": 1.4193649291992188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980861984715937, "compression/movement_sparsity/importance_threshold": -1.340375840228579e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207965067340067, "compression/movement_sparsity/model_sparsity": 0.8891643140263086, "compression_loss": 105.55652618408203, "distillation_loss": 4.663867950439453, "epoch": 4.63, "learning_rate": 2.9839391377852917e-05, "loss": 109.8642, "step": 5476, "task_loss": 2.5201027393341064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9980991880302509, "compression/movement_sparsity/importance_threshold": -1.3312782978027599e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208260428972408, "compression/movement_sparsity/model_sparsity": 0.8891928355314717, "compression_loss": 105.5572509765625, "distillation_loss": 3.7670371532440186, "epoch": 4.63, "learning_rate": 2.9834695219310603e-05, "loss": 109.7349, "step": 5477, "task_loss": 2.95670485496521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998112118679446, "compression/movement_sparsity/importance_threshold": -1.3222220140019295e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920875480496259, "compression/movement_sparsity/model_sparsity": 0.8892405747968758, "compression_loss": 105.55802154541016, "distillation_loss": 3.4481446743011475, "epoch": 4.63, "learning_rate": 2.9829999060768293e-05, "loss": 109.1789, "step": 5478, "task_loss": 2.270829916000366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981249905530645, "compression/movement_sparsity/importance_threshold": -1.3132068950564776e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208802501633134, "compression/movement_sparsity/model_sparsity": 0.8892451806111936, "compression_loss": 105.55878448486328, "distillation_loss": 3.694906234741211, "epoch": 4.63, "learning_rate": 2.9825302902225983e-05, "loss": 109.2548, "step": 5479, "task_loss": 2.381474018096924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981378037849913, "compression/movement_sparsity/importance_threshold": -1.3042328471967075e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209064356354417, "compression/movement_sparsity/model_sparsity": 0.8892704665317984, "compression_loss": 105.55953216552734, "distillation_loss": 5.149961948394775, "epoch": 4.63, "learning_rate": 2.9820606743683666e-05, "loss": 109.2785, "step": 5480, "task_loss": 2.5064005851745605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981505585091117, "compression/movement_sparsity/importance_threshold": -1.2952997766530958e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209269094312725, "compression/movement_sparsity/model_sparsity": 0.8892902369897576, "compression_loss": 105.56021881103516, "distillation_loss": 4.603871822357178, "epoch": 4.63, "learning_rate": 2.9815910585141355e-05, "loss": 109.3411, "step": 5481, "task_loss": 2.3995096683502197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981632548593108, "compression/movement_sparsity/importance_threshold": -1.2864075896559454e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209023456459426, "compression/movement_sparsity/model_sparsity": 0.8892665170460209, "compression_loss": 105.56086730957031, "distillation_loss": 2.985311508178711, "epoch": 4.63, "learning_rate": 2.9811214426599042e-05, "loss": 108.9121, "step": 5482, "task_loss": 1.6399996280670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981758929694737, "compression/movement_sparsity/importance_threshold": -1.2775561924357332e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209357929361612, "compression/movement_sparsity/model_sparsity": 0.8892988153189246, "compression_loss": 105.56161499023438, "distillation_loss": 3.9357495307922363, "epoch": 4.63, "learning_rate": 2.980651826805673e-05, "loss": 109.5999, "step": 5483, "task_loss": 2.5850868225097656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9981884729734857, "compression/movement_sparsity/importance_threshold": -1.2687454912228488e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208624116085301, "compression/movement_sparsity/model_sparsity": 0.889227954865645, "compression_loss": 105.56236267089844, "distillation_loss": 4.823022365570068, "epoch": 4.64, "learning_rate": 2.980182210951442e-05, "loss": 110.5117, "step": 5484, "task_loss": 2.3073902130126953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982009950052316, "compression/movement_sparsity/importance_threshold": -1.259975392247769e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208684213890186, "compression/movement_sparsity/model_sparsity": 0.8892337581916855, "compression_loss": 105.56307983398438, "distillation_loss": 5.302607536315918, "epoch": 4.64, "learning_rate": 2.9797125950972104e-05, "loss": 110.5084, "step": 5485, "task_loss": 2.736426830291748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982134591985969, "compression/movement_sparsity/importance_threshold": -1.25124580174071e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9208960019887604, "compression/movement_sparsity/model_sparsity": 0.8892603913129782, "compression_loss": 105.56381225585938, "distillation_loss": 4.095168590545654, "epoch": 4.64, "learning_rate": 2.9792429792429794e-05, "loss": 109.5123, "step": 5486, "task_loss": 2.788638114929199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982258656874666, "compression/movement_sparsity/importance_threshold": -1.2425566259321487e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209203272907376, "compression/movement_sparsity/model_sparsity": 0.8892838809659991, "compression_loss": 105.56459045410156, "distillation_loss": 6.452739238739014, "epoch": 4.64, "learning_rate": 2.9787733633887484e-05, "loss": 110.244, "step": 5487, "task_loss": 3.432340383529663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982382146057258, "compression/movement_sparsity/importance_threshold": -1.2339077710524747e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209508889323883, "compression/movement_sparsity/model_sparsity": 0.8893133927212405, "compression_loss": 105.56527709960938, "distillation_loss": 2.6238038539886475, "epoch": 4.64, "learning_rate": 2.978303747534517e-05, "loss": 109.0877, "step": 5488, "task_loss": 1.5291496515274048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982505060872596, "compression/movement_sparsity/importance_threshold": -1.2252991433322515e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210213846114514, "compression/movement_sparsity/model_sparsity": 0.8893814666568578, "compression_loss": 105.5660629272461, "distillation_loss": 4.410998344421387, "epoch": 4.64, "learning_rate": 2.9778341316802856e-05, "loss": 109.4212, "step": 5489, "task_loss": 2.4471828937530518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982627402659533, "compression/movement_sparsity/importance_threshold": -1.2167306490016086e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210750195174776, "compression/movement_sparsity/model_sparsity": 0.8894332590388616, "compression_loss": 105.56681823730469, "distillation_loss": 4.909655570983887, "epoch": 4.64, "learning_rate": 2.9773645158260543e-05, "loss": 109.1968, "step": 5490, "task_loss": 3.52805495262146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998274917275692, "compression/movement_sparsity/importance_threshold": -1.2082021942911095e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210568113134976, "compression/movement_sparsity/model_sparsity": 0.8894156763427034, "compression_loss": 105.56758117675781, "distillation_loss": 2.9336447715759277, "epoch": 4.64, "learning_rate": 2.9768948999718232e-05, "loss": 109.1259, "step": 5491, "task_loss": 1.8460140228271484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982870372503607, "compression/movement_sparsity/importance_threshold": -1.199713685431144e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210429077340342, "compression/movement_sparsity/model_sparsity": 0.8894022503939669, "compression_loss": 105.5682601928711, "distillation_loss": 2.950896739959717, "epoch": 4.64, "learning_rate": 2.9764252841175922e-05, "loss": 109.2713, "step": 5492, "task_loss": 1.5536385774612427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9982991003238446, "compression/movement_sparsity/importance_threshold": -1.1912650286521019e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210244133500309, "compression/movement_sparsity/model_sparsity": 0.8893843913489496, "compression_loss": 105.5689926147461, "distillation_loss": 5.545666694641113, "epoch": 4.64, "learning_rate": 2.9759556682633605e-05, "loss": 110.0599, "step": 5493, "task_loss": 2.71593976020813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998311106630029, "compression/movement_sparsity/importance_threshold": -1.1828561301842865e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210034506633271, "compression/movement_sparsity/model_sparsity": 0.8893641487950228, "compression_loss": 105.5696792602539, "distillation_loss": 4.216726303100586, "epoch": 4.64, "learning_rate": 2.9754860524091295e-05, "loss": 109.9911, "step": 5494, "task_loss": 3.4572525024414062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983230563027988, "compression/movement_sparsity/importance_threshold": -1.1744868962581742e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210672330360113, "compression/movement_sparsity/model_sparsity": 0.8894257400469878, "compression_loss": 105.57041931152344, "distillation_loss": 5.391574382781982, "epoch": 4.64, "learning_rate": 2.975016436554898e-05, "loss": 109.9368, "step": 5495, "task_loss": 2.361868143081665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983349494760394, "compression/movement_sparsity/importance_threshold": -1.166157233104155e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9210615213597138, "compression/movement_sparsity/model_sparsity": 0.8894202245843422, "compression_loss": 105.5711669921875, "distillation_loss": 4.1287946701049805, "epoch": 4.65, "learning_rate": 2.974546820700667e-05, "loss": 110.1577, "step": 5496, "task_loss": 2.693920850753784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983467862836357, "compression/movement_sparsity/importance_threshold": -1.1578670469526188e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211191747102332, "compression/movement_sparsity/model_sparsity": 0.8894758973649088, "compression_loss": 105.57190704345703, "distillation_loss": 4.5669169425964355, "epoch": 4.65, "learning_rate": 2.9740772048464354e-05, "loss": 109.6205, "step": 5497, "task_loss": 3.3176538944244385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998358566859473, "compression/movement_sparsity/importance_threshold": -1.1496162440339554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211239920739581, "compression/movement_sparsity/model_sparsity": 0.8894805492373699, "compression_loss": 105.57260131835938, "distillation_loss": 4.57017707824707, "epoch": 4.65, "learning_rate": 2.9736075889922044e-05, "loss": 109.625, "step": 5498, "task_loss": 2.720797538757324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983702913374364, "compression/movement_sparsity/importance_threshold": -1.1414047305786414e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.921157952103385, "compression/movement_sparsity/model_sparsity": 0.8895133426353127, "compression_loss": 105.57333374023438, "distillation_loss": 4.807487964630127, "epoch": 4.65, "learning_rate": 2.9731379731379733e-05, "loss": 110.057, "step": 5499, "task_loss": 2.0919086933135986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983819598514109, "compression/movement_sparsity/importance_threshold": -1.1332324128169799e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211280105184514, "compression/movement_sparsity/model_sparsity": 0.8894844296359325, "compression_loss": 105.573974609375, "distillation_loss": 4.656310558319092, "epoch": 4.65, "learning_rate": 2.9726683572837423e-05, "loss": 109.6831, "step": 5500, "task_loss": 3.0060484409332275 }, { "epoch": 4.65, "eval_accuracy": 0.5844356435643564, "eval_loss": 109.220458984375, "eval_runtime": 227.6601, "eval_samples_per_second": 110.911, "eval_steps_per_second": 0.87, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9983935725352818, "compression/movement_sparsity/importance_threshold": -1.1250991969794476e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211420691120941, "compression/movement_sparsity/model_sparsity": 0.8894980052736343, "compression_loss": 105.57464599609375, "distillation_loss": 5.109872817993164, "epoch": 4.65, "learning_rate": 2.972198741429511e-05, "loss": 109.9574, "step": 5501, "task_loss": 3.02851939201355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984051295229343, "compression/movement_sparsity/importance_threshold": -1.1170049892963475e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211321124321181, "compression/movement_sparsity/model_sparsity": 0.889488390636246, "compression_loss": 105.57530212402344, "distillation_loss": 5.3459954261779785, "epoch": 4.65, "learning_rate": 2.9717291255752792e-05, "loss": 110.1277, "step": 5502, "task_loss": 3.3991212844848633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984166309482533, "compression/movement_sparsity/importance_threshold": -1.1089496959981564e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211259834099532, "compression/movement_sparsity/model_sparsity": 0.8894824721648475, "compression_loss": 105.57597351074219, "distillation_loss": 4.142326354980469, "epoch": 4.65, "learning_rate": 2.9712595097210482e-05, "loss": 109.6358, "step": 5503, "task_loss": 2.7353999614715576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984280769451241, "compression/movement_sparsity/importance_threshold": -1.1009332233151772e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211947500847093, "compression/movement_sparsity/model_sparsity": 0.8895488764927747, "compression_loss": 105.57666778564453, "distillation_loss": 4.549707889556885, "epoch": 4.65, "learning_rate": 2.9707898938668172e-05, "loss": 109.9483, "step": 5504, "task_loss": 2.590615749359131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998439467647432, "compression/movement_sparsity/importance_threshold": -1.0929554774778867e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212436272478487, "compression/movement_sparsity/model_sparsity": 0.8895960745749965, "compression_loss": 105.57730865478516, "distillation_loss": 3.782606601715088, "epoch": 4.65, "learning_rate": 2.970320278012586e-05, "loss": 109.6767, "step": 5505, "task_loss": 2.520418405532837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984508031890618, "compression/movement_sparsity/importance_threshold": -1.0850163647167614e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212841336453077, "compression/movement_sparsity/model_sparsity": 0.8896351894530905, "compression_loss": 105.5779037475586, "distillation_loss": 3.771148681640625, "epoch": 4.65, "learning_rate": 2.9698506621583544e-05, "loss": 109.2835, "step": 5506, "task_loss": 2.1486878395080566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984620837038989, "compression/movement_sparsity/importance_threshold": -1.0771157912620177e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212520457101996, "compression/movement_sparsity/model_sparsity": 0.8896042038372675, "compression_loss": 105.57848358154297, "distillation_loss": 3.160004138946533, "epoch": 4.65, "learning_rate": 2.9693810463041234e-05, "loss": 109.9779, "step": 5507, "task_loss": 2.5075628757476807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984733093258282, "compression/movement_sparsity/importance_threshold": -1.069253663344219e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212581151115262, "compression/movement_sparsity/model_sparsity": 0.8896100647359868, "compression_loss": 105.5790786743164, "distillation_loss": 4.342315673828125, "epoch": 4.66, "learning_rate": 2.968911430449892e-05, "loss": 109.3495, "step": 5508, "task_loss": 2.3489034175872803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984844801887351, "compression/movement_sparsity/importance_threshold": -1.0614298871935816e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211778177666664, "compression/movement_sparsity/model_sparsity": 0.8895325258519464, "compression_loss": 105.57966613769531, "distillation_loss": 3.7558228969573975, "epoch": 4.66, "learning_rate": 2.968441814595661e-05, "loss": 109.2396, "step": 5509, "task_loss": 2.336001396179199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9984955964265047, "compression/movement_sparsity/importance_threshold": -1.053644369040669e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212110861943704, "compression/movement_sparsity/model_sparsity": 0.8895646514068132, "compression_loss": 105.5802001953125, "distillation_loss": 4.306859970092773, "epoch": 4.66, "learning_rate": 2.9679721987414293e-05, "loss": 109.8347, "step": 5510, "task_loss": 2.8960933685302734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985066581730219, "compression/movement_sparsity/importance_threshold": -1.0458970151158709e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211811923061073, "compression/movement_sparsity/model_sparsity": 0.8895357844655762, "compression_loss": 105.58077239990234, "distillation_loss": 4.569491386413574, "epoch": 4.66, "learning_rate": 2.9675025828871983e-05, "loss": 109.4031, "step": 5511, "task_loss": 2.2558350563049316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985176655621721, "compression/movement_sparsity/importance_threshold": -1.0381877316494037e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211969679798896, "compression/movement_sparsity/model_sparsity": 0.8895510181964325, "compression_loss": 105.58133697509766, "distillation_loss": 3.9297678470611572, "epoch": 4.66, "learning_rate": 2.9670329670329673e-05, "loss": 109.8766, "step": 5512, "task_loss": 1.3193987607955933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985286187278404, "compression/movement_sparsity/importance_threshold": -1.0305164248718308e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211955847764438, "compression/movement_sparsity/model_sparsity": 0.8895496825102802, "compression_loss": 105.58186340332031, "distillation_loss": 5.221585273742676, "epoch": 4.66, "learning_rate": 2.9665633511787362e-05, "loss": 109.6087, "step": 5513, "task_loss": 3.1311895847320557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985395178039117, "compression/movement_sparsity/importance_threshold": -1.0228830010136289e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212410754759747, "compression/movement_sparsity/model_sparsity": 0.8895936104643364, "compression_loss": 105.5824203491211, "distillation_loss": 4.5852508544921875, "epoch": 4.66, "learning_rate": 2.966093735324505e-05, "loss": 110.1361, "step": 5514, "task_loss": 1.674310564994812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985503629242715, "compression/movement_sparsity/importance_threshold": -1.0152873663050142e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212778853814665, "compression/movement_sparsity/model_sparsity": 0.8896291558363342, "compression_loss": 105.58287048339844, "distillation_loss": 4.056064128875732, "epoch": 4.66, "learning_rate": 2.965624119470273e-05, "loss": 109.6557, "step": 5515, "task_loss": 2.0885801315307617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985611542228048, "compression/movement_sparsity/importance_threshold": -1.0077294269763767e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212736880744588, "compression/movement_sparsity/model_sparsity": 0.8896251027197345, "compression_loss": 105.58338928222656, "distillation_loss": 3.532503366470337, "epoch": 4.66, "learning_rate": 2.965154503616042e-05, "loss": 109.4044, "step": 5516, "task_loss": 2.4017531871795654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985718918333966, "compression/movement_sparsity/importance_threshold": -1.0002090892582798e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213254747345012, "compression/movement_sparsity/model_sparsity": 0.8896751103486902, "compression_loss": 105.58386993408203, "distillation_loss": 3.244553327560425, "epoch": 4.66, "learning_rate": 2.964684887761811e-05, "loss": 108.9161, "step": 5517, "task_loss": 2.21809458732605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985825758899322, "compression/movement_sparsity/importance_threshold": -9.927262593810264e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9213226129342686, "compression/movement_sparsity/model_sparsity": 0.8896723468600995, "compression_loss": 105.58434295654297, "distillation_loss": 4.375256061553955, "epoch": 4.66, "learning_rate": 2.96421527190758e-05, "loss": 109.0316, "step": 5518, "task_loss": 3.2320735454559326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9985932065262967, "compression/movement_sparsity/importance_threshold": -9.852808435749198e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921392285845765, "compression/movement_sparsity/model_sparsity": 0.889739626292747, "compression_loss": 105.58478546142578, "distillation_loss": 3.3518385887145996, "epoch": 4.66, "learning_rate": 2.9637456560533484e-05, "loss": 109.2038, "step": 5519, "task_loss": 2.1962645053863525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986037838763752, "compression/movement_sparsity/importance_threshold": -9.778727480706101e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214746579957935, "compression/movement_sparsity/model_sparsity": 0.8898191687060157, "compression_loss": 105.58521270751953, "distillation_loss": 3.33717679977417, "epoch": 4.67, "learning_rate": 2.9632760401991173e-05, "loss": 109.577, "step": 5520, "task_loss": 1.2198176383972168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986143080740528, "compression/movement_sparsity/importance_threshold": -9.705018790982269e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214571294693689, "compression/movement_sparsity/model_sparsity": 0.8898022423383978, "compression_loss": 105.58563232421875, "distillation_loss": 5.187948226928711, "epoch": 4.67, "learning_rate": 2.962806424344886e-05, "loss": 109.8002, "step": 5521, "task_loss": 1.6369948387145996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986247792532149, "compression/movement_sparsity/importance_threshold": -9.631681428882467e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214765897109506, "compression/movement_sparsity/model_sparsity": 0.8898210340608144, "compression_loss": 105.58607482910156, "distillation_loss": 4.363204002380371, "epoch": 4.67, "learning_rate": 2.962336808490655e-05, "loss": 109.8458, "step": 5522, "task_loss": 2.6628644466400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986351975477463, "compression/movement_sparsity/importance_threshold": -9.55871445671233e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215207210553709, "compression/movement_sparsity/model_sparsity": 0.88986364935779, "compression_loss": 105.58650970458984, "distillation_loss": 4.051202774047852, "epoch": 4.67, "learning_rate": 2.9618671926364233e-05, "loss": 109.2223, "step": 5523, "task_loss": 2.1149940490722656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986455630915324, "compression/movement_sparsity/importance_threshold": -9.486116936774022e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9214801192645707, "compression/movement_sparsity/model_sparsity": 0.8898244423634096, "compression_loss": 105.58686828613281, "distillation_loss": 4.639266490936279, "epoch": 4.67, "learning_rate": 2.9613975767821922e-05, "loss": 110.0244, "step": 5524, "task_loss": 2.3971798419952393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986558760184582, "compression/movement_sparsity/importance_threshold": -9.413887931371441e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215139242798185, "compression/movement_sparsity/model_sparsity": 0.8898570860723871, "compression_loss": 105.58724975585938, "distillation_loss": 4.142387390136719, "epoch": 4.67, "learning_rate": 2.9609279609279612e-05, "loss": 109.369, "step": 5525, "task_loss": 3.0739452838897705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986661364624089, "compression/movement_sparsity/importance_threshold": -9.34202650281022e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216057045981116, "compression/movement_sparsity/model_sparsity": 0.8899457134543977, "compression_loss": 105.587646484375, "distillation_loss": 3.021927833557129, "epoch": 4.67, "learning_rate": 2.9604583450737298e-05, "loss": 109.323, "step": 5526, "task_loss": 1.3770098686218262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986763445572695, "compression/movement_sparsity/importance_threshold": -9.270531713392524e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216087214125236, "compression/movement_sparsity/model_sparsity": 0.8899486266319537, "compression_loss": 105.58799743652344, "distillation_loss": 4.188009738922119, "epoch": 4.67, "learning_rate": 2.9599887292194985e-05, "loss": 110.2321, "step": 5527, "task_loss": 2.551593780517578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986865004369254, "compression/movement_sparsity/importance_threshold": -9.199402625423118e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215627060496168, "compression/movement_sparsity/model_sparsity": 0.8899041920383226, "compression_loss": 105.58845520019531, "distillation_loss": 3.984070301055908, "epoch": 4.67, "learning_rate": 2.959519113365267e-05, "loss": 109.0129, "step": 5528, "task_loss": 3.040581703186035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9986966042352615, "compression/movement_sparsity/importance_threshold": -9.12863830120677e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216025327695205, "compression/movement_sparsity/model_sparsity": 0.8899426505878764, "compression_loss": 105.58878326416016, "distillation_loss": 3.0336554050445557, "epoch": 4.67, "learning_rate": 2.959049497511036e-05, "loss": 108.6966, "step": 5529, "task_loss": 1.4262202978134155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987066560861632, "compression/movement_sparsity/importance_threshold": -9.05823780304564e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215813912203021, "compression/movement_sparsity/model_sparsity": 0.8899222353159126, "compression_loss": 105.58914184570312, "distillation_loss": 3.816352128982544, "epoch": 4.67, "learning_rate": 2.958579881656805e-05, "loss": 109.781, "step": 5530, "task_loss": 1.7347720861434937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987166561235153, "compression/movement_sparsity/importance_threshold": -8.988200193244499e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215621694620731, "compression/movement_sparsity/model_sparsity": 0.8899036738842119, "compression_loss": 105.58946990966797, "distillation_loss": 4.625051021575928, "epoch": 4.67, "learning_rate": 2.958110265802574e-05, "loss": 109.7069, "step": 5531, "task_loss": 2.8517322540283203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987266044812032, "compression/movement_sparsity/importance_threshold": -8.918524534108109e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216012449594159, "compression/movement_sparsity/model_sparsity": 0.8899414070180105, "compression_loss": 105.5898208618164, "distillation_loss": 4.2481160163879395, "epoch": 4.68, "learning_rate": 2.9576406499483423e-05, "loss": 109.6789, "step": 5532, "task_loss": 2.5806219577789307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.998736501293112, "compression/movement_sparsity/importance_threshold": -8.849209887939503e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216336071503796, "compression/movement_sparsity/model_sparsity": 0.8899726574681569, "compression_loss": 105.5901870727539, "distillation_loss": 4.160010814666748, "epoch": 4.68, "learning_rate": 2.957171034094111e-05, "loss": 109.6953, "step": 5533, "task_loss": 2.2042551040649414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987463466931267, "compression/movement_sparsity/importance_threshold": -8.780255317044315e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216428006836268, "compression/movement_sparsity/model_sparsity": 0.8899815351752545, "compression_loss": 105.59048461914062, "distillation_loss": 3.8082284927368164, "epoch": 4.68, "learning_rate": 2.95670141823988e-05, "loss": 108.9387, "step": 5534, "task_loss": 2.353160858154297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987561408151326, "compression/movement_sparsity/importance_threshold": -8.71165988372384e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216554522254885, "compression/movement_sparsity/model_sparsity": 0.8899937520977326, "compression_loss": 105.59085845947266, "distillation_loss": 3.0033059120178223, "epoch": 4.68, "learning_rate": 2.956231802385649e-05, "loss": 109.463, "step": 5535, "task_loss": 2.2943849563598633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987658837930148, "compression/movement_sparsity/importance_threshold": -8.643422650283714e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216439573278875, "compression/movement_sparsity/model_sparsity": 0.8899826520852265, "compression_loss": 105.59125518798828, "distillation_loss": 4.7117719650268555, "epoch": 4.68, "learning_rate": 2.9557621865314172e-05, "loss": 110.2154, "step": 5536, "task_loss": 3.2189416885375977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987755757606583, "compression/movement_sparsity/importance_threshold": -8.5755426790287e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216480473173866, "compression/movement_sparsity/model_sparsity": 0.8899866015710041, "compression_loss": 105.5915756225586, "distillation_loss": 5.699721336364746, "epoch": 4.68, "learning_rate": 2.955292570677186e-05, "loss": 109.3269, "step": 5537, "task_loss": 3.0598111152648926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987852168519485, "compression/movement_sparsity/importance_threshold": -8.508019032260096e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921680647991703, "compression/movement_sparsity/model_sparsity": 0.8900180823118664, "compression_loss": 105.59193420410156, "distillation_loss": 3.4482791423797607, "epoch": 4.68, "learning_rate": 2.954822954822955e-05, "loss": 109.3828, "step": 5538, "task_loss": 1.66573965549469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9987948072007704, "compression/movement_sparsity/importance_threshold": -8.440850772284403e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216533535719845, "compression/movement_sparsity/model_sparsity": 0.8899917255394327, "compression_loss": 105.59231567382812, "distillation_loss": 3.452169895172119, "epoch": 4.68, "learning_rate": 2.9543533389687238e-05, "loss": 109.6048, "step": 5539, "task_loss": 2.3701376914978027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988043469410092, "compression/movement_sparsity/importance_threshold": -8.374036961404652e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921644005024558, "compression/movement_sparsity/model_sparsity": 0.8899826981433697, "compression_loss": 105.5926284790039, "distillation_loss": 3.420638084411621, "epoch": 4.68, "learning_rate": 2.9538837231144924e-05, "loss": 109.3162, "step": 5540, "task_loss": 2.4548180103302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988138362065498, "compression/movement_sparsity/importance_threshold": -8.307576661924741e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921613848804607, "compression/movement_sparsity/model_sparsity": 0.8899535778823454, "compression_loss": 105.593017578125, "distillation_loss": 3.7270278930664062, "epoch": 4.68, "learning_rate": 2.953414107260261e-05, "loss": 109.1725, "step": 5541, "task_loss": 2.056856393814087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988232751312777, "compression/movement_sparsity/importance_threshold": -8.241468936149438e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9215867690199059, "compression/movement_sparsity/model_sparsity": 0.8899274283715559, "compression_loss": 105.59333801269531, "distillation_loss": 4.259583950042725, "epoch": 4.68, "learning_rate": 2.95294449140603e-05, "loss": 109.4983, "step": 5542, "task_loss": 2.125389814376831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988326638490778, "compression/movement_sparsity/importance_threshold": -8.175712846381772e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216080298108007, "compression/movement_sparsity/model_sparsity": 0.8899479587888777, "compression_loss": 105.59367370605469, "distillation_loss": 4.642014026641846, "epoch": 4.69, "learning_rate": 2.952474875551799e-05, "loss": 109.8015, "step": 5543, "task_loss": 2.4712116718292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988420024938354, "compression/movement_sparsity/importance_threshold": -8.110307454924776e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216659693413434, "compression/movement_sparsity/model_sparsity": 0.8900039079183033, "compression_loss": 105.59407043457031, "distillation_loss": 3.6531600952148438, "epoch": 4.69, "learning_rate": 2.952005259697568e-05, "loss": 109.3419, "step": 5544, "task_loss": 1.648470401763916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988512911994354, "compression/movement_sparsity/importance_threshold": -8.04525182408495e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216930968227149, "compression/movement_sparsity/model_sparsity": 0.8900301034872359, "compression_loss": 105.59442138671875, "distillation_loss": 2.666248321533203, "epoch": 4.69, "learning_rate": 2.9515356438433362e-05, "loss": 109.2488, "step": 5545, "task_loss": 1.1622225046157837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988605300997632, "compression/movement_sparsity/importance_threshold": -7.980545016164457e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217111977091862, "compression/movement_sparsity/model_sparsity": 0.890047582552572, "compression_loss": 105.59477233886719, "distillation_loss": 4.48745059967041, "epoch": 4.69, "learning_rate": 2.951066027989105e-05, "loss": 109.6328, "step": 5546, "task_loss": 3.091247081756592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988697193287037, "compression/movement_sparsity/importance_threshold": -7.916186093468065e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217097191123993, "compression/movement_sparsity/model_sparsity": 0.8900461547501335, "compression_loss": 105.59513854980469, "distillation_loss": 4.150511264801025, "epoch": 4.69, "learning_rate": 2.950596412134874e-05, "loss": 109.844, "step": 5547, "task_loss": 3.1403212547302246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988788590201423, "compression/movement_sparsity/importance_threshold": -7.852174118299672e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9216973418263933, "compression/movement_sparsity/model_sparsity": 0.8900342026619787, "compression_loss": 105.59547424316406, "distillation_loss": 3.0130043029785156, "epoch": 4.69, "learning_rate": 2.9501267962806428e-05, "loss": 109.4778, "step": 5548, "task_loss": 1.7208449840545654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988879493079639, "compression/movement_sparsity/importance_threshold": -7.788508152964044e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921714345689442, "compression/movement_sparsity/model_sparsity": 0.8900506223900218, "compression_loss": 105.59584045410156, "distillation_loss": 3.5317206382751465, "epoch": 4.69, "learning_rate": 2.949657180426411e-05, "loss": 109.9433, "step": 5549, "task_loss": 1.816392183303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9988969903260538, "compression/movement_sparsity/importance_threshold": -7.725187259762477e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217758863186107, "compression/movement_sparsity/model_sparsity": 0.8901100489092574, "compression_loss": 105.59619903564453, "distillation_loss": 5.341182231903076, "epoch": 4.69, "learning_rate": 2.94918756457218e-05, "loss": 109.7352, "step": 5550, "task_loss": 2.734313488006592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989059822082972, "compression/movement_sparsity/importance_threshold": -7.662210501001472e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217829692741865, "compression/movement_sparsity/model_sparsity": 0.8901168885435193, "compression_loss": 105.59648895263672, "distillation_loss": 4.024999618530273, "epoch": 4.69, "learning_rate": 2.948717948717949e-05, "loss": 108.9484, "step": 5551, "task_loss": 2.456216812133789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989149250885789, "compression/movement_sparsity/importance_threshold": -7.599576938984061e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217955611952099, "compression/movement_sparsity/model_sparsity": 0.8901290478933184, "compression_loss": 105.5968246459961, "distillation_loss": 3.687828540802002, "epoch": 4.69, "learning_rate": 2.9482483328637177e-05, "loss": 109.1914, "step": 5552, "task_loss": 2.942133903503418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989238191007845, "compression/movement_sparsity/importance_threshold": -7.537285636014142e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217559133378206, "compression/movement_sparsity/model_sparsity": 0.8900907620618015, "compression_loss": 105.59710693359375, "distillation_loss": 4.101009368896484, "epoch": 4.69, "learning_rate": 2.947778717009486e-05, "loss": 109.3429, "step": 5553, "task_loss": 2.2548351287841797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989326643787988, "compression/movement_sparsity/importance_threshold": -7.475335654396481e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217146795661358, "compression/movement_sparsity/model_sparsity": 0.890050944797024, "compression_loss": 105.5974349975586, "distillation_loss": 4.991859436035156, "epoch": 4.69, "learning_rate": 2.947309101155255e-05, "loss": 110.481, "step": 5554, "task_loss": 2.8656604290008545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989414610565072, "compression/movement_sparsity/importance_threshold": -7.41372605643411e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921726722975448, "compression/movement_sparsity/model_sparsity": 0.8900625744781765, "compression_loss": 105.5978012084961, "distillation_loss": 3.668736219406128, "epoch": 4.7, "learning_rate": 2.946839485301024e-05, "loss": 110.1701, "step": 5555, "task_loss": 1.8532589673995972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989502092677945, "compression/movement_sparsity/importance_threshold": -7.352455904430927e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9217874050645469, "compression/movement_sparsity/model_sparsity": 0.890121171950835, "compression_loss": 105.59812927246094, "distillation_loss": 3.6224679946899414, "epoch": 4.7, "learning_rate": 2.946369869446793e-05, "loss": 109.897, "step": 5556, "task_loss": 1.8085417747497559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989589091465461, "compression/movement_sparsity/importance_threshold": -7.291524260693433e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9218594389612352, "compression/movement_sparsity/model_sparsity": 0.8901907312615698, "compression_loss": 105.59845733642578, "distillation_loss": 4.960972785949707, "epoch": 4.7, "learning_rate": 2.9459002535925612e-05, "loss": 109.9854, "step": 5557, "task_loss": 3.8202719688415527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989675608266471, "compression/movement_sparsity/importance_threshold": -7.230930187522057e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.921879125762002, "compression/movement_sparsity/model_sparsity": 0.8902097417601665, "compression_loss": 105.59879302978516, "distillation_loss": 3.8254401683807373, "epoch": 4.7, "learning_rate": 2.9454306377383302e-05, "loss": 109.4549, "step": 5558, "task_loss": 1.8766640424728394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989761644419826, "compression/movement_sparsity/importance_threshold": -7.170672747222433e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219141351181809, "compression/movement_sparsity/model_sparsity": 0.8902435484372593, "compression_loss": 105.59912109375, "distillation_loss": 5.655785083770752, "epoch": 4.7, "learning_rate": 2.9449610218840988e-05, "loss": 109.5628, "step": 5559, "task_loss": 3.0628976821899414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989847201264378, "compression/movement_sparsity/importance_threshold": -7.110751002097591e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9218850401491494, "compression/movement_sparsity/model_sparsity": 0.8902154529699207, "compression_loss": 105.59945678710938, "distillation_loss": 4.490151405334473, "epoch": 4.7, "learning_rate": 2.9444914060298678e-05, "loss": 109.6885, "step": 5560, "task_loss": 3.1283621788024902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9989932280138978, "compression/movement_sparsity/importance_threshold": -7.0511640144531665e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9218889035794634, "compression/movement_sparsity/model_sparsity": 0.890219183679518, "compression_loss": 105.59978485107422, "distillation_loss": 3.3454227447509766, "epoch": 4.7, "learning_rate": 2.9440217901756368e-05, "loss": 109.3777, "step": 5561, "task_loss": 1.4451587200164795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990016882382476, "compression/movement_sparsity/importance_threshold": -6.991910846592189e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219447683048374, "compression/movement_sparsity/model_sparsity": 0.8902731292797155, "compression_loss": 105.6000747680664, "distillation_loss": 3.354673147201538, "epoch": 4.7, "learning_rate": 2.943552174321405e-05, "loss": 109.6329, "step": 5562, "task_loss": 2.4315085411071777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990101009333726, "compression/movement_sparsity/importance_threshold": -6.932990560818558e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219354078332432, "compression/movement_sparsity/model_sparsity": 0.8902640903691168, "compression_loss": 105.60040283203125, "distillation_loss": 4.514801502227783, "epoch": 4.7, "learning_rate": 2.943082558467174e-05, "loss": 109.8817, "step": 5563, "task_loss": 2.743793249130249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990184662331578, "compression/movement_sparsity/importance_threshold": -6.87440221943704e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92196210604458, "compression/movement_sparsity/model_sparsity": 0.8902898714147607, "compression_loss": 105.60069274902344, "distillation_loss": 5.476081848144531, "epoch": 4.7, "learning_rate": 2.942612942612943e-05, "loss": 110.2584, "step": 5564, "task_loss": 2.48602032661438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990267842714884, "compression/movement_sparsity/importance_threshold": -6.816144884750665e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219440170822764, "compression/movement_sparsity/model_sparsity": 0.8902724038639604, "compression_loss": 105.60093688964844, "distillation_loss": 4.484657287597656, "epoch": 4.7, "learning_rate": 2.9421433267587116e-05, "loss": 110.5321, "step": 5565, "task_loss": 2.4490067958831787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990350551822496, "compression/movement_sparsity/importance_threshold": -6.758217619062465e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219091388919415, "compression/movement_sparsity/model_sparsity": 0.8902387238467614, "compression_loss": 105.601318359375, "distillation_loss": 5.056672096252441, "epoch": 4.7, "learning_rate": 2.94167371090448e-05, "loss": 110.1083, "step": 5566, "task_loss": 2.8924527168273926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990432790993262, "compression/movement_sparsity/importance_threshold": -6.700619484679808e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219169253734077, "compression/movement_sparsity/model_sparsity": 0.8902462428386352, "compression_loss": 105.6015853881836, "distillation_loss": 4.894302845001221, "epoch": 4.71, "learning_rate": 2.941204095050249e-05, "loss": 109.9424, "step": 5567, "task_loss": 1.9469314813613892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990514561566037, "compression/movement_sparsity/importance_threshold": -6.643349543903124e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922036012035587, "compression/movement_sparsity/model_sparsity": 0.8903612385076153, "compression_loss": 105.60186767578125, "distillation_loss": 3.305497646331787, "epoch": 4.71, "learning_rate": 2.940734479196018e-05, "loss": 109.741, "step": 5568, "task_loss": 2.193976402282715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990595864879671, "compression/movement_sparsity/importance_threshold": -6.5864068590389124e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219974850499556, "compression/movement_sparsity/model_sparsity": 0.8903240350424632, "compression_loss": 105.60218811035156, "distillation_loss": 4.102393627166748, "epoch": 4.71, "learning_rate": 2.940264863341787e-05, "loss": 109.6391, "step": 5569, "task_loss": 2.1778347492218018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990676702273016, "compression/movement_sparsity/importance_threshold": -6.529790492389338e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9219882199717025, "compression/movement_sparsity/model_sparsity": 0.8903150882481508, "compression_loss": 105.60255432128906, "distillation_loss": 5.236992835998535, "epoch": 4.71, "learning_rate": 2.939795247487555e-05, "loss": 109.8669, "step": 5570, "task_loss": 3.277637481689453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990757075084923, "compression/movement_sparsity/importance_threshold": -6.473499506259167e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9220859385254784, "compression/movement_sparsity/model_sparsity": 0.8904094498689871, "compression_loss": 105.602783203125, "distillation_loss": 5.6884765625, "epoch": 4.71, "learning_rate": 2.939325631633324e-05, "loss": 109.9738, "step": 5571, "task_loss": 3.4811792373657227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990836984654243, "compression/movement_sparsity/importance_threshold": -6.417532962952298e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92210802208394, "compression/movement_sparsity/model_sparsity": 0.8904307747892786, "compression_loss": 105.60305786132812, "distillation_loss": 6.012972831726074, "epoch": 4.71, "learning_rate": 2.9388560157790928e-05, "loss": 109.6622, "step": 5572, "task_loss": 3.3200559616088867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990916432319829, "compression/movement_sparsity/importance_threshold": -6.361889924772629e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221484927088962, "compression/movement_sparsity/model_sparsity": 0.8904698551237652, "compression_loss": 105.60332489013672, "distillation_loss": 5.538825988769531, "epoch": 4.71, "learning_rate": 2.9383863999248617e-05, "loss": 110.2575, "step": 5573, "task_loss": 3.4202160835266113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9990995419420531, "compression/movement_sparsity/importance_threshold": -6.306569454024059e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922173974655134, "compression/movement_sparsity/model_sparsity": 0.8904944616867582, "compression_loss": 105.6036376953125, "distillation_loss": 3.3037567138671875, "epoch": 4.71, "learning_rate": 2.93791678407063e-05, "loss": 109.9624, "step": 5574, "task_loss": 1.8733536005020142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991073947295199, "compression/movement_sparsity/importance_threshold": -6.251570613011355e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221768483795343, "compression/movement_sparsity/model_sparsity": 0.8904972366898846, "compression_loss": 105.60387420654297, "distillation_loss": 3.0969977378845215, "epoch": 4.71, "learning_rate": 2.937447168216399e-05, "loss": 108.7325, "step": 5575, "task_loss": 1.4023114442825317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991152017282688, "compression/movement_sparsity/importance_threshold": -6.196892464037547e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222409765530799, "compression/movement_sparsity/model_sparsity": 0.8905591618633877, "compression_loss": 105.60407257080078, "distillation_loss": 3.2210378646850586, "epoch": 4.71, "learning_rate": 2.936977552362168e-05, "loss": 109.6591, "step": 5576, "task_loss": 1.3083778619766235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991229630721846, "compression/movement_sparsity/importance_threshold": -6.1425340694074015e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222008040323146, "compression/movement_sparsity/model_sparsity": 0.8905203693922958, "compression_loss": 105.60431671142578, "distillation_loss": 4.883753299713135, "epoch": 4.71, "learning_rate": 2.9365079365079366e-05, "loss": 109.7846, "step": 5577, "task_loss": 2.3021059036254883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991306788951527, "compression/movement_sparsity/importance_threshold": -6.08849449142395e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222622492681423, "compression/movement_sparsity/model_sparsity": 0.8905797037952451, "compression_loss": 105.60458374023438, "distillation_loss": 3.5930442810058594, "epoch": 4.71, "learning_rate": 2.9360383206537056e-05, "loss": 109.3932, "step": 5578, "task_loss": 1.856186866760254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991383493310582, "compression/movement_sparsity/importance_threshold": -6.034772792391958e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223005735429239, "compression/movement_sparsity/model_sparsity": 0.8906167115132888, "compression_loss": 105.6048583984375, "distillation_loss": 4.280333518981934, "epoch": 4.72, "learning_rate": 2.935568704799474e-05, "loss": 109.6598, "step": 5579, "task_loss": 2.888791799545288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991459745137861, "compression/movement_sparsity/importance_threshold": -5.981368034614458e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222862764659285, "compression/movement_sparsity/model_sparsity": 0.8906029055848711, "compression_loss": 105.60505676269531, "distillation_loss": 4.122594833374023, "epoch": 4.72, "learning_rate": 2.935099088945243e-05, "loss": 109.5739, "step": 5580, "task_loss": 2.5315141677856445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991535545772214, "compression/movement_sparsity/importance_threshold": -5.9282792803979495e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222529484173864, "compression/movement_sparsity/model_sparsity": 0.8905707224573254, "compression_loss": 105.60533142089844, "distillation_loss": 5.100676536560059, "epoch": 4.72, "learning_rate": 2.9346294730910118e-05, "loss": 109.7613, "step": 5581, "task_loss": 3.0022695064544678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991610896552497, "compression/movement_sparsity/importance_threshold": -5.875505592042862e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222779176244158, "compression/movement_sparsity/model_sparsity": 0.8905948338952792, "compression_loss": 105.6055679321289, "distillation_loss": 4.076694965362549, "epoch": 4.72, "learning_rate": 2.9341598572367808e-05, "loss": 109.3416, "step": 5582, "task_loss": 3.1584131717681885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991685798817559, "compression/movement_sparsity/importance_threshold": -5.82304603185483e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223399471444576, "compression/movement_sparsity/model_sparsity": 0.8906547325104823, "compression_loss": 105.60575103759766, "distillation_loss": 5.687671661376953, "epoch": 4.72, "learning_rate": 2.933690241382549e-05, "loss": 110.1453, "step": 5583, "task_loss": 4.006703853607178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999176025390625, "compression/movement_sparsity/importance_threshold": -5.770899662138618e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223615298878785, "compression/movement_sparsity/model_sparsity": 0.8906755738202705, "compression_loss": 105.6059341430664, "distillation_loss": 4.237778186798096, "epoch": 4.72, "learning_rate": 2.933220625528318e-05, "loss": 109.5858, "step": 5584, "task_loss": 4.231777191162109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991834263157423, "compression/movement_sparsity/importance_threshold": -5.719065545197258e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223449195223618, "compression/movement_sparsity/model_sparsity": 0.8906595340719087, "compression_loss": 105.60612487792969, "distillation_loss": 5.169844627380371, "epoch": 4.72, "learning_rate": 2.9327510096740867e-05, "loss": 109.8318, "step": 5585, "task_loss": 4.575788497924805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991907827909929, "compression/movement_sparsity/importance_threshold": -5.667542743334648e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223528729421748, "compression/movement_sparsity/model_sparsity": 0.8906672142672837, "compression_loss": 105.60625457763672, "distillation_loss": 4.209450721740723, "epoch": 4.72, "learning_rate": 2.9322813938198557e-05, "loss": 110.5658, "step": 5586, "task_loss": 2.2490382194519043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9991980949502621, "compression/movement_sparsity/importance_threshold": -5.616330318854688e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223205703720493, "compression/movement_sparsity/model_sparsity": 0.8906360213898162, "compression_loss": 105.60641479492188, "distillation_loss": 4.804665565490723, "epoch": 4.72, "learning_rate": 2.931811777965624e-05, "loss": 109.2357, "step": 5587, "task_loss": 2.4421379566192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992053629274348, "compression/movement_sparsity/importance_threshold": -5.565427334063011e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222619988606219, "compression/movement_sparsity/model_sparsity": 0.8905794619899934, "compression_loss": 105.60655975341797, "distillation_loss": 3.900477886199951, "epoch": 4.72, "learning_rate": 2.931342162111393e-05, "loss": 109.8548, "step": 5588, "task_loss": 2.1990997791290283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992125868563962, "compression/movement_sparsity/importance_threshold": -5.514832851260913e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222659696084446, "compression/movement_sparsity/model_sparsity": 0.890583296330413, "compression_loss": 105.60664367675781, "distillation_loss": 4.9571123123168945, "epoch": 4.72, "learning_rate": 2.930872546257162e-05, "loss": 109.2734, "step": 5589, "task_loss": 2.4132933616638184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992197668710315, "compression/movement_sparsity/importance_threshold": -5.464545932754895e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222598167379445, "compression/movement_sparsity/model_sparsity": 0.890577354829943, "compression_loss": 105.60681915283203, "distillation_loss": 3.153958320617676, "epoch": 4.72, "learning_rate": 2.9304029304029305e-05, "loss": 109.3255, "step": 5590, "task_loss": 1.3000088930130005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992269031052259, "compression/movement_sparsity/importance_threshold": -5.4145656408471216e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222596497995976, "compression/movement_sparsity/model_sparsity": 0.8905771936264419, "compression_loss": 105.60693359375, "distillation_loss": 3.181910991668701, "epoch": 4.73, "learning_rate": 2.9299333145486995e-05, "loss": 109.0583, "step": 5591, "task_loss": 1.8961840867996216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992339956928643, "compression/movement_sparsity/importance_threshold": -5.364891037843225e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222647056466753, "compression/movement_sparsity/model_sparsity": 0.8905820757896188, "compression_loss": 105.60704803466797, "distillation_loss": 4.400503158569336, "epoch": 4.73, "learning_rate": 2.9294636986944678e-05, "loss": 109.7531, "step": 5592, "task_loss": 2.0076029300689697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992410447678322, "compression/movement_sparsity/importance_threshold": -5.315521186044503e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222514340480965, "compression/movement_sparsity/model_sparsity": 0.8905692601112795, "compression_loss": 105.60713195800781, "distillation_loss": 5.280132293701172, "epoch": 4.73, "learning_rate": 2.9289940828402368e-05, "loss": 109.7759, "step": 5593, "task_loss": 2.8329017162323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992480504640143, "compression/movement_sparsity/importance_threshold": -5.266455147758323e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222018891315695, "compression/movement_sparsity/model_sparsity": 0.8905214172150532, "compression_loss": 105.60722351074219, "distillation_loss": 3.7911579608917236, "epoch": 4.73, "learning_rate": 2.9285244669860057e-05, "loss": 109.4872, "step": 5594, "task_loss": 2.000415802001953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992550129152962, "compression/movement_sparsity/importance_threshold": -5.217691985285981e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221883671254705, "compression/movement_sparsity/model_sparsity": 0.8905083597314621, "compression_loss": 105.60737609863281, "distillation_loss": 4.422634124755859, "epoch": 4.73, "learning_rate": 2.9280548511317747e-05, "loss": 109.5805, "step": 5595, "task_loss": 2.5590217113494873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992619322555627, "compression/movement_sparsity/importance_threshold": -5.169230760932245e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222048105526404, "compression/movement_sparsity/model_sparsity": 0.8905242382763228, "compression_loss": 105.60743713378906, "distillation_loss": 3.7626872062683105, "epoch": 4.73, "learning_rate": 2.927585235277543e-05, "loss": 109.7688, "step": 5596, "task_loss": 2.32246732711792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999268808618699, "compression/movement_sparsity/importance_threshold": -5.121070537002746e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221765264270081, "compression/movement_sparsity/model_sparsity": 0.8904969257974181, "compression_loss": 105.6075668334961, "distillation_loss": 3.9103686809539795, "epoch": 4.73, "learning_rate": 2.9271156194233117e-05, "loss": 110.2285, "step": 5597, "task_loss": 2.6917808055877686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992756421385904, "compression/movement_sparsity/importance_threshold": -5.073210375798783e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222107010914524, "compression/movement_sparsity/model_sparsity": 0.8905299264570053, "compression_loss": 105.60763549804688, "distillation_loss": 3.9632201194763184, "epoch": 4.73, "learning_rate": 2.9266460035690806e-05, "loss": 109.969, "step": 5598, "task_loss": 3.065769910812378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999282432949122, "compression/movement_sparsity/importance_threshold": -5.02564933962512e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221763594886612, "compression/movement_sparsity/model_sparsity": 0.890496764593917, "compression_loss": 105.60772705078125, "distillation_loss": 4.642152786254883, "epoch": 4.73, "learning_rate": 2.9261763877148496e-05, "loss": 110.5089, "step": 5599, "task_loss": 2.245070219039917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9992891811841788, "compression/movement_sparsity/importance_threshold": -4.978386490787391e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9221357219253582, "compression/movement_sparsity/model_sparsity": 0.8904575230559293, "compression_loss": 105.60783386230469, "distillation_loss": 4.128574848175049, "epoch": 4.73, "learning_rate": 2.925706771860618e-05, "loss": 110.2386, "step": 5600, "task_loss": 2.39890193939209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999295886977646, "compression/movement_sparsity/importance_threshold": -4.931420891587761e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222088886179718, "compression/movement_sparsity/model_sparsity": 0.8905281762475645, "compression_loss": 105.60791015625, "distillation_loss": 3.3879923820495605, "epoch": 4.73, "learning_rate": 2.925237156006387e-05, "loss": 109.5162, "step": 5601, "task_loss": 3.122440814971924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993025504634088, "compression/movement_sparsity/importance_threshold": -4.884751604331862e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222581115819727, "compression/movement_sparsity/model_sparsity": 0.8905757082513244, "compression_loss": 105.60799407958984, "distillation_loss": 5.597792625427246, "epoch": 4.73, "learning_rate": 2.924767540152156e-05, "loss": 110.2009, "step": 5602, "task_loss": 2.21575665473938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993091717753523, "compression/movement_sparsity/importance_threshold": -4.838377691321859e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222598286621122, "compression/movement_sparsity/model_sparsity": 0.8905773663444788, "compression_loss": 105.6080551147461, "distillation_loss": 4.800532341003418, "epoch": 4.74, "learning_rate": 2.9242979242979245e-05, "loss": 110.6373, "step": 5603, "task_loss": 1.7648063898086548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993157510473617, "compression/movement_sparsity/importance_threshold": -4.79229821486165e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922251255185582, "compression/movement_sparsity/model_sparsity": 0.8905690873932426, "compression_loss": 105.6081314086914, "distillation_loss": 4.228703022003174, "epoch": 4.74, "learning_rate": 2.9238283084436928e-05, "loss": 110.0539, "step": 5604, "task_loss": 1.912705898284912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999322288413322, "compression/movement_sparsity/importance_threshold": -4.746512237256868e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222495023329396, "compression/movement_sparsity/model_sparsity": 0.8905673947564807, "compression_loss": 105.60822296142578, "distillation_loss": 3.682400703430176, "epoch": 4.74, "learning_rate": 2.9233586925894617e-05, "loss": 109.3762, "step": 5605, "task_loss": 2.4366307258605957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993287840071184, "compression/movement_sparsity/importance_threshold": -4.701018820810546e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222358491609964, "compression/movement_sparsity/model_sparsity": 0.890554210612996, "compression_loss": 105.60833740234375, "distillation_loss": 5.052546977996826, "epoch": 4.74, "learning_rate": 2.9228890767352307e-05, "loss": 110.0106, "step": 5606, "task_loss": 3.261329174041748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993352379626362, "compression/movement_sparsity/importance_threshold": -4.655817027826581e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222561560184803, "compression/movement_sparsity/model_sparsity": 0.8905738198674541, "compression_loss": 105.60840606689453, "distillation_loss": 4.146862983703613, "epoch": 4.74, "learning_rate": 2.9224194608809997e-05, "loss": 109.8905, "step": 5607, "task_loss": 2.6811201572418213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993416504137603, "compression/movement_sparsity/importance_threshold": -4.61090592060974e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922283772390725, "compression/movement_sparsity/model_sparsity": 0.8906004875323542, "compression_loss": 105.60856628417969, "distillation_loss": 4.35052490234375, "epoch": 4.74, "learning_rate": 2.9219498450267687e-05, "loss": 109.8753, "step": 5608, "task_loss": 2.6895864009857178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993480214943761, "compression/movement_sparsity/importance_threshold": -4.5662845614630534e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222717051330775, "compression/movement_sparsity/model_sparsity": 0.8905888348221301, "compression_loss": 105.60872650146484, "distillation_loss": 3.369302272796631, "epoch": 4.74, "learning_rate": 2.921480229172537e-05, "loss": 109.3214, "step": 5609, "task_loss": 1.4828367233276367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993543513383685, "compression/movement_sparsity/importance_threshold": -4.521952012691288e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222965550984307, "compression/movement_sparsity/model_sparsity": 0.890612831114726, "compression_loss": 105.60883331298828, "distillation_loss": 3.7170021533966064, "epoch": 4.74, "learning_rate": 2.9210106133183056e-05, "loss": 109.8172, "step": 5610, "task_loss": 2.8559083938598633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993606400796227, "compression/movement_sparsity/importance_threshold": -4.477907336597475e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922320021860338, "compression/movement_sparsity/model_sparsity": 0.8906354917211696, "compression_loss": 105.60894775390625, "distillation_loss": 4.377997398376465, "epoch": 4.74, "learning_rate": 2.9205409974640746e-05, "loss": 109.2672, "step": 5611, "task_loss": 2.5065746307373047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993668878520238, "compression/movement_sparsity/importance_threshold": -4.434149595487248e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223383850784973, "compression/movement_sparsity/model_sparsity": 0.8906532241062932, "compression_loss": 105.6090316772461, "distillation_loss": 4.252251625061035, "epoch": 4.74, "learning_rate": 2.9200713816098435e-05, "loss": 109.5054, "step": 5612, "task_loss": 2.5108642578125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993730947894571, "compression/movement_sparsity/importance_threshold": -4.390677851661903e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223015632488377, "compression/movement_sparsity/model_sparsity": 0.8906176672197598, "compression_loss": 105.6091537475586, "distillation_loss": 4.167950630187988, "epoch": 4.74, "learning_rate": 2.9196017657556118e-05, "loss": 109.0515, "step": 5613, "task_loss": 2.6121675968170166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993792610258077, "compression/movement_sparsity/importance_threshold": -4.3474911674279415e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222815544955447, "compression/movement_sparsity/model_sparsity": 0.8905983458286965, "compression_loss": 105.6092300415039, "distillation_loss": 4.750764846801758, "epoch": 4.75, "learning_rate": 2.9191321499013808e-05, "loss": 110.1734, "step": 5614, "task_loss": 2.9575939178466797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993853866949606, "compression/movement_sparsity/importance_threshold": -4.304588605088394e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223010505096294, "compression/movement_sparsity/model_sparsity": 0.8906171720947206, "compression_loss": 105.6092300415039, "distillation_loss": 5.412894248962402, "epoch": 4.75, "learning_rate": 2.9186625340471498e-05, "loss": 110.1121, "step": 5615, "task_loss": 3.969921827316284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999391471930801, "compression/movement_sparsity/importance_threshold": -4.26196922694716e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223165280792207, "compression/movement_sparsity/model_sparsity": 0.8906321179621819, "compression_loss": 105.60932159423828, "distillation_loss": 4.211411952972412, "epoch": 4.75, "learning_rate": 2.9181929181929184e-05, "loss": 109.91, "step": 5616, "task_loss": 3.04245662689209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9993975168672142, "compression/movement_sparsity/importance_threshold": -4.219632095308137e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222415250647912, "compression/movement_sparsity/model_sparsity": 0.8905596915320342, "compression_loss": 105.609375, "distillation_loss": 3.9418325424194336, "epoch": 4.75, "learning_rate": 2.9177233023386867e-05, "loss": 109.8852, "step": 5617, "task_loss": 1.845381736755371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994035216380851, "compression/movement_sparsity/importance_threshold": -4.177576272476093e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222629050973623, "compression/movement_sparsity/model_sparsity": 0.8905803370947138, "compression_loss": 105.60942077636719, "distillation_loss": 4.406393051147461, "epoch": 4.75, "learning_rate": 2.9172536864844557e-05, "loss": 109.5133, "step": 5618, "task_loss": 2.2049105167388916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994094863772991, "compression/movement_sparsity/importance_threshold": -4.13580082075319e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9222959469658812, "compression/movement_sparsity/model_sparsity": 0.8906122438734004, "compression_loss": 105.60942840576172, "distillation_loss": 4.3336358070373535, "epoch": 4.75, "learning_rate": 2.9167840706302246e-05, "loss": 110.2571, "step": 5619, "task_loss": 3.0254037380218506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999415411218741, "compression/movement_sparsity/importance_threshold": -4.094304802445929e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223347243590331, "compression/movement_sparsity/model_sparsity": 0.8906496891438044, "compression_loss": 105.60943603515625, "distillation_loss": 3.093466281890869, "epoch": 4.75, "learning_rate": 2.9163144547759936e-05, "loss": 109.2726, "step": 5620, "task_loss": 1.5833241939544678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994212962962963, "compression/movement_sparsity/importance_threshold": -4.053087279856475e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223158484016655, "compression/movement_sparsity/model_sparsity": 0.8906314616336416, "compression_loss": 105.60943603515625, "distillation_loss": 4.237001895904541, "epoch": 4.75, "learning_rate": 2.9158448389217623e-05, "loss": 109.4637, "step": 5621, "task_loss": 1.3900835514068604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99942714174385, "compression/movement_sparsity/importance_threshold": -4.012147315288726e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223312186537481, "compression/movement_sparsity/model_sparsity": 0.8906463038702808, "compression_loss": 105.60940551757812, "distillation_loss": 3.6893773078918457, "epoch": 4.75, "learning_rate": 2.915375223067531e-05, "loss": 109.5022, "step": 5622, "task_loss": 1.9447572231292725 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999432947695287, "compression/movement_sparsity/importance_threshold": -3.971483971047449e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223598247319066, "compression/movement_sparsity/model_sparsity": 0.8906739272416518, "compression_loss": 105.60939025878906, "distillation_loss": 3.966864585876465, "epoch": 4.75, "learning_rate": 2.9149056072132995e-05, "loss": 109.0791, "step": 5623, "task_loss": 1.7631661891937256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994387142844928, "compression/movement_sparsity/importance_threshold": -3.931096309436541e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224275778524136, "compression/movement_sparsity/model_sparsity": 0.8907393528340364, "compression_loss": 105.60933685302734, "distillation_loss": 2.955288887023926, "epoch": 4.75, "learning_rate": 2.9144359913590685e-05, "loss": 109.4623, "step": 5624, "task_loss": 1.7654914855957031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994444416453524, "compression/movement_sparsity/importance_threshold": -3.890983392759902e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224232016828912, "compression/movement_sparsity/model_sparsity": 0.8907351269993998, "compression_loss": 105.60929870605469, "distillation_loss": 5.397034168243408, "epoch": 4.75, "learning_rate": 2.9139663755048375e-05, "loss": 109.9182, "step": 5625, "task_loss": 3.168004035949707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999450129911751, "compression/movement_sparsity/importance_threshold": -3.85114428332143e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223894085918112, "compression/movement_sparsity/model_sparsity": 0.8907024948049581, "compression_loss": 105.60926055908203, "distillation_loss": 4.069596290588379, "epoch": 4.76, "learning_rate": 2.9134967596506058e-05, "loss": 109.1805, "step": 5626, "task_loss": 1.802577018737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994557792175736, "compression/movement_sparsity/importance_threshold": -3.8115780434250235e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223828741479467, "compression/movement_sparsity/model_sparsity": 0.8906961848393427, "compression_loss": 105.60919952392578, "distillation_loss": 3.475609540939331, "epoch": 4.76, "learning_rate": 2.9130271437963747e-05, "loss": 108.8786, "step": 5627, "task_loss": 2.124619722366333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994613896967054, "compression/movement_sparsity/importance_threshold": -3.772283735374582e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223769716849669, "compression/movement_sparsity/model_sparsity": 0.8906904851441244, "compression_loss": 105.60908508300781, "distillation_loss": 4.698302745819092, "epoch": 4.76, "learning_rate": 2.9125575279421434e-05, "loss": 109.8252, "step": 5628, "task_loss": 2.3554258346557617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994669614830316, "compression/movement_sparsity/importance_threshold": -3.733260421474871e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223554724107195, "compression/movement_sparsity/model_sparsity": 0.8906697244360868, "compression_loss": 105.60902404785156, "distillation_loss": 3.1704049110412598, "epoch": 4.76, "learning_rate": 2.9120879120879123e-05, "loss": 109.6902, "step": 5629, "task_loss": 2.841761350631714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994724947104373, "compression/movement_sparsity/importance_threshold": -3.694507164028922e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922418360470831, "compression/movement_sparsity/model_sparsity": 0.8907304520978673, "compression_loss": 105.60891723632812, "distillation_loss": 5.021284580230713, "epoch": 4.76, "learning_rate": 2.9116182962336806e-05, "loss": 109.8644, "step": 5630, "task_loss": 3.831285238265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994779895128076, "compression/movement_sparsity/importance_threshold": -3.6560230253406337e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224172038265703, "compression/movement_sparsity/model_sparsity": 0.8907293351878952, "compression_loss": 105.60879516601562, "distillation_loss": 3.7207274436950684, "epoch": 4.76, "learning_rate": 2.9111486803794496e-05, "loss": 109.5091, "step": 5631, "task_loss": 1.6434932947158813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994834460240277, "compression/movement_sparsity/importance_threshold": -3.6178070677156393e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224530836469866, "compression/movement_sparsity/model_sparsity": 0.890763982426101, "compression_loss": 105.60871887207031, "distillation_loss": 4.3512468338012695, "epoch": 4.76, "learning_rate": 2.9106790645252186e-05, "loss": 109.9053, "step": 5632, "task_loss": 2.5089259147644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994888643779828, "compression/movement_sparsity/importance_threshold": -3.5798583534552356e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9223592762201953, "compression/movement_sparsity/model_sparsity": 0.8906733975730053, "compression_loss": 105.60857391357422, "distillation_loss": 3.995251178741455, "epoch": 4.76, "learning_rate": 2.9102094486709876e-05, "loss": 109.3422, "step": 5633, "task_loss": 1.972611665725708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999494244708558, "compression/movement_sparsity/importance_threshold": -3.542175944865056e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224251810947187, "compression/movement_sparsity/model_sparsity": 0.8907370384123418, "compression_loss": 105.60847473144531, "distillation_loss": 3.2171730995178223, "epoch": 4.76, "learning_rate": 2.909739832816756e-05, "loss": 109.2319, "step": 5634, "task_loss": 2.8067142963409424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9994995871496383, "compression/movement_sparsity/importance_threshold": -3.5047589042489988e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9224059474123221, "compression/movement_sparsity/model_sparsity": 0.8907184654661051, "compression_loss": 105.60838317871094, "distillation_loss": 3.8062636852264404, "epoch": 4.76, "learning_rate": 2.9092702169625248e-05, "loss": 109.7905, "step": 5635, "task_loss": 2.176510810852051 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995048918351089, "compression/movement_sparsity/importance_threshold": -3.467606293910963e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225063369796485, "compression/movement_sparsity/model_sparsity": 0.8908154063429594, "compression_loss": 105.60826110839844, "distillation_loss": 4.2388176918029785, "epoch": 4.76, "learning_rate": 2.9088006011082935e-05, "loss": 109.705, "step": 5636, "task_loss": 1.9871796369552612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995101588988551, "compression/movement_sparsity/importance_threshold": -3.430717176155715e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225204194216263, "compression/movement_sparsity/model_sparsity": 0.8908290050097327, "compression_loss": 105.60816955566406, "distillation_loss": 4.142559051513672, "epoch": 4.76, "learning_rate": 2.9083309852540624e-05, "loss": 109.5132, "step": 5637, "task_loss": 2.2919771671295166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995153884747618, "compression/movement_sparsity/importance_threshold": -3.394090613285418e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922522363060951, "compression/movement_sparsity/model_sparsity": 0.8908308818790672, "compression_loss": 105.60810852050781, "distillation_loss": 2.455982208251953, "epoch": 4.77, "learning_rate": 2.9078613693998314e-05, "loss": 108.8454, "step": 5638, "task_loss": 1.6038649082183838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995205806967143, "compression/movement_sparsity/importance_threshold": -3.3577256676057057e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225714071624374, "compression/movement_sparsity/model_sparsity": 0.8908782411647902, "compression_loss": 105.60797119140625, "distillation_loss": 4.4005022048950195, "epoch": 4.77, "learning_rate": 2.9073917535455997e-05, "loss": 109.7628, "step": 5639, "task_loss": 2.1704659461975098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995257356985978, "compression/movement_sparsity/importance_threshold": -3.3216214014196097e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225260237804153, "compression/movement_sparsity/model_sparsity": 0.8908344168415562, "compression_loss": 105.60785675048828, "distillation_loss": 3.815706491470337, "epoch": 4.77, "learning_rate": 2.9069221376913687e-05, "loss": 109.8515, "step": 5640, "task_loss": 3.159796953201294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995308536142973, "compression/movement_sparsity/importance_threshold": -3.2857768770310283e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9225654093061164, "compression/movement_sparsity/model_sparsity": 0.8908724493532855, "compression_loss": 105.6076889038086, "distillation_loss": 5.378450870513916, "epoch": 4.77, "learning_rate": 2.9064525218371373e-05, "loss": 110.1261, "step": 5641, "task_loss": 2.8019936084747314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999535934577698, "compression/movement_sparsity/importance_threshold": -3.250191156744728e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226058680069049, "compression/movement_sparsity/model_sparsity": 0.8909115181732364, "compression_loss": 105.60759735107422, "distillation_loss": 2.6440932750701904, "epoch": 4.77, "learning_rate": 2.9059829059829063e-05, "loss": 108.9915, "step": 5642, "task_loss": 0.9439173340797424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999540978722685, "compression/movement_sparsity/importance_threshold": -3.2148633028646068e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9226422844148648, "compression/movement_sparsity/model_sparsity": 0.890946683565553, "compression_loss": 105.60747528076172, "distillation_loss": 3.772413730621338, "epoch": 4.77, "learning_rate": 2.9055132901286746e-05, "loss": 110.0079, "step": 5643, "task_loss": 2.9333770275115967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995459861831435, "compression/movement_sparsity/importance_threshold": -3.1797923776936965e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922691793558889, "compression/movement_sparsity/model_sparsity": 0.8909944919181718, "compression_loss": 105.60733032226562, "distillation_loss": 3.9946603775024414, "epoch": 4.77, "learning_rate": 2.9050436742744435e-05, "loss": 109.1117, "step": 5644, "task_loss": 3.417046070098877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995509570929586, "compression/movement_sparsity/importance_threshold": -3.1449774435358957e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922716810462589, "compression/movement_sparsity/model_sparsity": 0.8910186494142688, "compression_loss": 105.60714721679688, "distillation_loss": 3.8972456455230713, "epoch": 4.77, "learning_rate": 2.9045740584202125e-05, "loss": 109.5048, "step": 5645, "task_loss": 2.5593321323394775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995558915860154, "compression/movement_sparsity/importance_threshold": -3.1104175626968378e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227141990698767, "compression/movement_sparsity/model_sparsity": 0.8910161277309298, "compression_loss": 105.60697174072266, "distillation_loss": 3.189892053604126, "epoch": 4.77, "learning_rate": 2.9041044425659815e-05, "loss": 109.4921, "step": 5646, "task_loss": 1.401733636856079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995607897961991, "compression/movement_sparsity/importance_threshold": -3.076111797479554e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227340408848228, "compression/movement_sparsity/model_sparsity": 0.891035287918492, "compression_loss": 105.60681915283203, "distillation_loss": 5.323644638061523, "epoch": 4.77, "learning_rate": 2.9036348267117498e-05, "loss": 109.8583, "step": 5647, "task_loss": 2.8699374198913574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995656518573949, "compression/movement_sparsity/importance_threshold": -3.042059210187943e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9227534772780693, "compression/movement_sparsity/model_sparsity": 0.891054056611837, "compression_loss": 105.60670471191406, "distillation_loss": 3.7010111808776855, "epoch": 4.77, "learning_rate": 2.9031652108575184e-05, "loss": 109.1357, "step": 5648, "task_loss": 2.7158405780792236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995704779034879, "compression/movement_sparsity/importance_threshold": -3.008258863125904e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228030937396021, "compression/movement_sparsity/model_sparsity": 0.8911019685952781, "compression_loss": 105.6065673828125, "distillation_loss": 4.858945846557617, "epoch": 4.77, "learning_rate": 2.9026955950032874e-05, "loss": 110.0395, "step": 5649, "task_loss": 2.619891881942749 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995752680683632, "compression/movement_sparsity/importance_threshold": -2.9747098185973347e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228590061616467, "compression/movement_sparsity/model_sparsity": 0.8911559602536187, "compression_loss": 105.60639190673828, "distillation_loss": 3.7543773651123047, "epoch": 4.78, "learning_rate": 2.9022259791490564e-05, "loss": 108.954, "step": 5650, "task_loss": 1.553162932395935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995800224859058, "compression/movement_sparsity/importance_threshold": -2.941411138907002e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228392239675388, "compression/movement_sparsity/model_sparsity": 0.8911368576387355, "compression_loss": 105.60621643066406, "distillation_loss": 3.602078914642334, "epoch": 4.78, "learning_rate": 2.9017563632948247e-05, "loss": 109.0845, "step": 5651, "task_loss": 1.0530041456222534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995847412900012, "compression/movement_sparsity/importance_threshold": -2.9083618863570693e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228570267498192, "compression/movement_sparsity/model_sparsity": 0.8911540488406768, "compression_loss": 105.60604095458984, "distillation_loss": 5.1785173416137695, "epoch": 4.78, "learning_rate": 2.9012867474405936e-05, "loss": 109.4983, "step": 5652, "task_loss": 2.988839626312256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995894246145341, "compression/movement_sparsity/importance_threshold": -2.8755611232540376e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228679731357089, "compression/movement_sparsity/model_sparsity": 0.8911646191845362, "compression_loss": 105.60591125488281, "distillation_loss": 3.4887447357177734, "epoch": 4.78, "learning_rate": 2.9008171315863626e-05, "loss": 109.1467, "step": 5653, "task_loss": 2.938291549682617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9995940725933901, "compression/movement_sparsity/importance_threshold": -2.843007911900071e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9228560847405759, "compression/movement_sparsity/model_sparsity": 0.8911531391923491, "compression_loss": 105.6057357788086, "distillation_loss": 4.141871452331543, "epoch": 4.78, "learning_rate": 2.9003475157321312e-05, "loss": 109.6863, "step": 5654, "task_loss": 1.7518213987350464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999598685360454, "compression/movement_sparsity/importance_threshold": -2.810701314599935e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229157890479287, "compression/movement_sparsity/model_sparsity": 0.8912107924730723, "compression_loss": 105.60563659667969, "distillation_loss": 5.261333465576172, "epoch": 4.78, "learning_rate": 2.8998778998779002e-05, "loss": 110.303, "step": 5655, "task_loss": 2.5550904273986816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996032630496111, "compression/movement_sparsity/importance_threshold": -2.7786403936566617e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229129153235284, "compression/movement_sparsity/model_sparsity": 0.8912080174699458, "compression_loss": 105.60546875, "distillation_loss": 4.363441467285156, "epoch": 4.78, "learning_rate": 2.8994082840236685e-05, "loss": 109.4663, "step": 5656, "task_loss": 2.0265538692474365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996078057947465, "compression/movement_sparsity/importance_threshold": -2.746824211375884e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.922955663464503, "compression/movement_sparsity/model_sparsity": 0.8912492970807693, "compression_loss": 105.6053237915039, "distillation_loss": 3.6328392028808594, "epoch": 4.78, "learning_rate": 2.8989386681694375e-05, "loss": 109.3796, "step": 5657, "task_loss": 2.4226694107055664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996123137297453, "compression/movement_sparsity/importance_threshold": -2.715251830059766e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229369305971471, "compression/movement_sparsity/model_sparsity": 0.8912312077450361, "compression_loss": 105.60517883300781, "distillation_loss": 3.928009033203125, "epoch": 4.78, "learning_rate": 2.8984690523152065e-05, "loss": 109.6372, "step": 5658, "task_loss": 2.5767695903778076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996167869884928, "compression/movement_sparsity/importance_threshold": -2.6839223120139408e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9229736570334655, "compression/movement_sparsity/model_sparsity": 0.8912666725152832, "compression_loss": 105.60507202148438, "distillation_loss": 4.8014140129089355, "epoch": 4.78, "learning_rate": 2.8979994364609754e-05, "loss": 109.2881, "step": 5659, "task_loss": 2.319236993789673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996212257048739, "compression/movement_sparsity/importance_threshold": -2.6528347195405727e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230042067509486, "compression/movement_sparsity/model_sparsity": 0.8912961727559888, "compression_loss": 105.60497283935547, "distillation_loss": 3.252730131149292, "epoch": 4.78, "learning_rate": 2.8975298206067437e-05, "loss": 108.8667, "step": 5660, "task_loss": 1.0513325929641724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996256300127739, "compression/movement_sparsity/importance_threshold": -2.621988114945295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230470383610966, "compression/movement_sparsity/model_sparsity": 0.8913375329685628, "compression_loss": 105.60482788085938, "distillation_loss": 3.057798385620117, "epoch": 4.78, "learning_rate": 2.8970602047525124e-05, "loss": 109.0069, "step": 5661, "task_loss": 1.599597454071045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996300000460779, "compression/movement_sparsity/importance_threshold": -2.591381560531139e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230470979819347, "compression/movement_sparsity/model_sparsity": 0.8913375905412418, "compression_loss": 105.60467529296875, "distillation_loss": 4.240011215209961, "epoch": 4.79, "learning_rate": 2.8965905888982813e-05, "loss": 109.9488, "step": 5662, "task_loss": 1.789961814880371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999634335938671, "compression/movement_sparsity/importance_threshold": -2.561014118602871e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230878905594171, "compression/movement_sparsity/model_sparsity": 0.8913769817681949, "compression_loss": 105.60453796386719, "distillation_loss": 4.366610050201416, "epoch": 4.79, "learning_rate": 2.8961209730440503e-05, "loss": 109.4668, "step": 5663, "task_loss": 1.924889087677002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996386378244385, "compression/movement_sparsity/importance_threshold": -2.530884851463522e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230724010656581, "compression/movement_sparsity/model_sparsity": 0.8913620243861978, "compression_loss": 105.60440826416016, "distillation_loss": 4.690059661865234, "epoch": 4.79, "learning_rate": 2.8956513571898186e-05, "loss": 110.0697, "step": 5664, "task_loss": 2.761455774307251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996429058372653, "compression/movement_sparsity/importance_threshold": -2.500992821417858e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230787327986727, "compression/movement_sparsity/model_sparsity": 0.8913681386047048, "compression_loss": 105.60428619384766, "distillation_loss": 2.7741312980651855, "epoch": 4.79, "learning_rate": 2.8951817413355876e-05, "loss": 109.5664, "step": 5665, "task_loss": 1.9377176761627197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996471401110367, "compression/movement_sparsity/importance_threshold": -2.4713370907689103e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230731642123868, "compression/movement_sparsity/model_sparsity": 0.8913627613164886, "compression_loss": 105.60414123535156, "distillation_loss": 4.983858585357666, "epoch": 4.79, "learning_rate": 2.8947121254813565e-05, "loss": 109.6826, "step": 5666, "task_loss": 2.3862502574920654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996513407796378, "compression/movement_sparsity/importance_threshold": -2.4419167218223126e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231099741178787, "compression/movement_sparsity/model_sparsity": 0.8913983066884864, "compression_loss": 105.60397338867188, "distillation_loss": 3.4344563484191895, "epoch": 4.79, "learning_rate": 2.8942425096271252e-05, "loss": 109.6671, "step": 5667, "task_loss": 1.5471762418746948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996555079769538, "compression/movement_sparsity/importance_threshold": -2.4127307768802286e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923103546991523, "compression/movement_sparsity/model_sparsity": 0.8913921003536931, "compression_loss": 105.60382843017578, "distillation_loss": 5.558966636657715, "epoch": 4.79, "learning_rate": 2.893772893772894e-05, "loss": 109.8489, "step": 5668, "task_loss": 2.1992363929748535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996596418368697, "compression/movement_sparsity/importance_threshold": -2.3837783182474245e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231275861134769, "compression/movement_sparsity/model_sparsity": 0.891415313657855, "compression_loss": 105.60367584228516, "distillation_loss": 4.293231010437012, "epoch": 4.79, "learning_rate": 2.8933032779186624e-05, "loss": 109.7502, "step": 5669, "task_loss": 2.4795405864715576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996637424932708, "compression/movement_sparsity/importance_threshold": -2.355058408227799e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923124175801533, "compression/movement_sparsity/model_sparsity": 0.8914120205006177, "compression_loss": 105.60350799560547, "distillation_loss": 4.44014310836792, "epoch": 4.79, "learning_rate": 2.8928336620644314e-05, "loss": 109.7427, "step": 5670, "task_loss": 2.3470048904418945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996678100800421, "compression/movement_sparsity/importance_threshold": -2.326570109126118e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9230924694397893, "compression/movement_sparsity/model_sparsity": 0.89138140334994, "compression_loss": 105.60332489013672, "distillation_loss": 4.180745601654053, "epoch": 4.79, "learning_rate": 2.8923640462102004e-05, "loss": 109.6855, "step": 5671, "task_loss": 3.089413642883301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996718447310688, "compression/movement_sparsity/importance_threshold": -2.2983124832454127e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231926205237628, "compression/movement_sparsity/model_sparsity": 0.8914781139360783, "compression_loss": 105.60311126708984, "distillation_loss": 2.157921075820923, "epoch": 4.79, "learning_rate": 2.891894430355969e-05, "loss": 109.0114, "step": 5672, "task_loss": 2.1778717041015625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996758465802362, "compression/movement_sparsity/importance_threshold": -2.2702845928887147e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232463985198006, "compression/movement_sparsity/model_sparsity": 0.8915300444925117, "compression_loss": 105.6029052734375, "distillation_loss": 3.26692533493042, "epoch": 4.79, "learning_rate": 2.8914248145017377e-05, "loss": 109.3564, "step": 5673, "task_loss": 2.509411334991455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996798157614292, "compression/movement_sparsity/importance_threshold": -2.2424855003625246e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232135355137961, "compression/movement_sparsity/model_sparsity": 0.891498310431862, "compression_loss": 105.6026840209961, "distillation_loss": 4.3126020431518555, "epoch": 4.8, "learning_rate": 2.8909551986475063e-05, "loss": 109.4038, "step": 5674, "task_loss": 1.9650356769561768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999683752408533, "compression/movement_sparsity/importance_threshold": -2.2149142679690065e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231845717106086, "compression/movement_sparsity/model_sparsity": 0.8914703416244171, "compression_loss": 105.6025161743164, "distillation_loss": 4.245550632476807, "epoch": 4.8, "learning_rate": 2.8904855827932753e-05, "loss": 109.6315, "step": 5675, "task_loss": 3.0455315113067627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996876566554328, "compression/movement_sparsity/importance_threshold": -2.1875699580129263e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232408657060176, "compression/movement_sparsity/model_sparsity": 0.8915247017479031, "compression_loss": 105.60231018066406, "distillation_loss": 3.9444079399108887, "epoch": 4.8, "learning_rate": 2.8900159669390442e-05, "loss": 110.2266, "step": 5676, "task_loss": 1.301820993423462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996915286360136, "compression/movement_sparsity/importance_threshold": -2.1604516327981826e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232746707212652, "compression/movement_sparsity/model_sparsity": 0.8915573454568806, "compression_loss": 105.60211181640625, "distillation_loss": 4.091458797454834, "epoch": 4.8, "learning_rate": 2.8895463510848125e-05, "loss": 109.4986, "step": 5677, "task_loss": 2.6463732719421387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996953684841609, "compression/movement_sparsity/importance_threshold": -2.133558354627807e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232628061744675, "compression/movement_sparsity/model_sparsity": 0.891545888493765, "compression_loss": 105.60187530517578, "distillation_loss": 3.783182144165039, "epoch": 4.8, "learning_rate": 2.8890767352305815e-05, "loss": 109.7105, "step": 5678, "task_loss": 2.8817298412323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9996991763337594, "compression/movement_sparsity/importance_threshold": -2.1068891858065653e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232313382960765, "compression/movement_sparsity/model_sparsity": 0.8915155016338032, "compression_loss": 105.60169982910156, "distillation_loss": 3.3853981494903564, "epoch": 4.8, "learning_rate": 2.8886071193763505e-05, "loss": 108.9397, "step": 5679, "task_loss": 1.9737993478775024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997029523186944, "compression/movement_sparsity/importance_threshold": -2.080443188638356e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231854302506785, "compression/movement_sparsity/model_sparsity": 0.8914711706709942, "compression_loss": 105.60153198242188, "distillation_loss": 4.332512855529785, "epoch": 4.8, "learning_rate": 2.888137503522119e-05, "loss": 109.5464, "step": 5680, "task_loss": 2.8926968574523926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997066965728512, "compression/movement_sparsity/importance_threshold": -2.054219425427946e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231700480744282, "compression/movement_sparsity/model_sparsity": 0.8914563169198193, "compression_loss": 105.60138702392578, "distillation_loss": 3.275078773498535, "epoch": 4.8, "learning_rate": 2.8876678876678874e-05, "loss": 109.4918, "step": 5681, "task_loss": 2.2673392295837402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997104092301147, "compression/movement_sparsity/importance_threshold": -2.0282169584783655e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231723852112848, "compression/movement_sparsity/model_sparsity": 0.8914585737688351, "compression_loss": 105.60120391845703, "distillation_loss": 5.186788082122803, "epoch": 4.8, "learning_rate": 2.8871982718136564e-05, "loss": 109.7595, "step": 5682, "task_loss": 2.766563653945923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997140904243702, "compression/movement_sparsity/importance_threshold": -2.0024348500926464e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9231985587592455, "compression/movement_sparsity/model_sparsity": 0.891483848174904, "compression_loss": 105.6009750366211, "distillation_loss": 3.2706546783447266, "epoch": 4.8, "learning_rate": 2.8867286559594254e-05, "loss": 109.1863, "step": 5683, "task_loss": 1.5364062786102295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997177402895029, "compression/movement_sparsity/importance_threshold": -1.976872162576422e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9232445621979847, "compression/movement_sparsity/model_sparsity": 0.8915282712539994, "compression_loss": 105.60076141357422, "distillation_loss": 5.436102867126465, "epoch": 4.8, "learning_rate": 2.8862590401051943e-05, "loss": 109.3955, "step": 5684, "task_loss": 2.8402621746063232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997213589593976, "compression/movement_sparsity/importance_threshold": -1.951527958233591e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923241473838567, "compression/movement_sparsity/model_sparsity": 0.8915252889892286, "compression_loss": 105.6005630493164, "distillation_loss": 4.29775857925415, "epoch": 4.81, "learning_rate": 2.885789424250963e-05, "loss": 109.7468, "step": 5685, "task_loss": 2.6675591468811035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997249465679399, "compression/movement_sparsity/importance_threshold": -1.926401299367185e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233000095774915, "compression/movement_sparsity/model_sparsity": 0.891581813845444, "compression_loss": 105.60035705566406, "distillation_loss": 4.684015274047852, "epoch": 4.81, "learning_rate": 2.8853198083967316e-05, "loss": 109.2161, "step": 5686, "task_loss": 2.2515711784362793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997285032490146, "compression/movement_sparsity/importance_threshold": -1.9014912482819696e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233031217852444, "compression/movement_sparsity/model_sparsity": 0.8915848191392863, "compression_loss": 105.60009765625, "distillation_loss": 4.124024868011475, "epoch": 4.81, "learning_rate": 2.8848501925425002e-05, "loss": 109.3567, "step": 5687, "task_loss": 2.2457985877990723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999732029136507, "compression/movement_sparsity/importance_threshold": -1.8767968672818436e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233214969275713, "compression/movement_sparsity/model_sparsity": 0.8916025630389458, "compression_loss": 105.59989929199219, "distillation_loss": 5.2724103927612305, "epoch": 4.81, "learning_rate": 2.8843805766882692e-05, "loss": 109.9345, "step": 5688, "task_loss": 2.300959348678589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997355243643022, "compression/movement_sparsity/importance_threshold": -1.8523172186698383e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233385365631229, "compression/movement_sparsity/model_sparsity": 0.8916190173105961, "compression_loss": 105.5996322631836, "distillation_loss": 4.746391296386719, "epoch": 4.81, "learning_rate": 2.8839109608340382e-05, "loss": 109.7073, "step": 5689, "task_loss": 4.34242582321167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997389890662853, "compression/movement_sparsity/importance_threshold": -1.8280513647515872e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233359251704106, "compression/movement_sparsity/model_sparsity": 0.8916164956272572, "compression_loss": 105.59937286376953, "distillation_loss": 2.142906427383423, "epoch": 4.81, "learning_rate": 2.8834413449798065e-05, "loss": 108.9779, "step": 5690, "task_loss": 1.1718320846557617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997424233763416, "compression/movement_sparsity/importance_threshold": -1.8039983678292543e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233293549540433, "compression/movement_sparsity/model_sparsity": 0.8916101511180343, "compression_loss": 105.59909057617188, "distillation_loss": 2.425784111022949, "epoch": 4.81, "learning_rate": 2.8829717291255754e-05, "loss": 108.8178, "step": 5691, "task_loss": 1.6886776685714722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997458274283559, "compression/movement_sparsity/importance_threshold": -1.7801572902084728e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233323002234494, "compression/movement_sparsity/model_sparsity": 0.8916129952083757, "compression_loss": 105.59879302978516, "distillation_loss": 4.4191155433654785, "epoch": 4.81, "learning_rate": 2.882502113271344e-05, "loss": 110.2999, "step": 5692, "task_loss": 3.944244861602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997492013562138, "compression/movement_sparsity/importance_threshold": -1.7565271941914068e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233687404797446, "compression/movement_sparsity/model_sparsity": 0.8916481836297637, "compression_loss": 105.59849548339844, "distillation_loss": 4.312252044677734, "epoch": 4.81, "learning_rate": 2.882032497417113e-05, "loss": 109.5661, "step": 5693, "task_loss": 2.179814100265503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997525452938001, "compression/movement_sparsity/importance_threshold": -1.7331071420836897e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92338742565043, "compression/movement_sparsity/model_sparsity": 0.8916662269073538, "compression_loss": 105.59819793701172, "distillation_loss": 3.828185796737671, "epoch": 4.81, "learning_rate": 2.8815628815628813e-05, "loss": 109.5124, "step": 5694, "task_loss": 2.3063619136810303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999755859375, "compression/movement_sparsity/importance_threshold": -1.70989619618922e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233983124154815, "compression/movement_sparsity/model_sparsity": 0.8916767396785342, "compression_loss": 105.59794616699219, "distillation_loss": 4.6303181648254395, "epoch": 4.81, "learning_rate": 2.8810932657086503e-05, "loss": 109.4474, "step": 5695, "task_loss": 2.97469162940979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997591437336987, "compression/movement_sparsity/importance_threshold": -1.6868934188110293e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233784348280325, "compression/movement_sparsity/model_sparsity": 0.8916575449473647, "compression_loss": 105.5976333618164, "distillation_loss": 3.579273223876953, "epoch": 4.81, "learning_rate": 2.8806236498544193e-05, "loss": 108.9941, "step": 5696, "task_loss": 1.6963462829589844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997623985037813, "compression/movement_sparsity/importance_threshold": -1.6640978722538835e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233599762165322, "compression/movement_sparsity/model_sparsity": 0.8916397204459547, "compression_loss": 105.5972900390625, "distillation_loss": 3.6290738582611084, "epoch": 4.82, "learning_rate": 2.8801540340001883e-05, "loss": 109.1993, "step": 5697, "task_loss": 2.6862573623657227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999765623819133, "compression/movement_sparsity/importance_threshold": -1.6415086188208139e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233822982583465, "compression/movement_sparsity/model_sparsity": 0.8916612756569622, "compression_loss": 105.59703826904297, "distillation_loss": 3.9492249488830566, "epoch": 4.82, "learning_rate": 2.879684418145957e-05, "loss": 109.9019, "step": 5698, "task_loss": 2.2623655796051025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999768819813639, "compression/movement_sparsity/importance_threshold": -1.6191247208165865e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.92339423435015, "compression/movement_sparsity/model_sparsity": 0.8916728017072925, "compression_loss": 105.59677124023438, "distillation_loss": 4.307875633239746, "epoch": 4.82, "learning_rate": 2.8792148022917252e-05, "loss": 109.6528, "step": 5699, "task_loss": 2.394026517868042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997719866211843, "compression/movement_sparsity/importance_threshold": -1.5969452405442328e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233531675168121, "compression/movement_sparsity/model_sparsity": 0.891633145646016, "compression_loss": 105.59647369384766, "distillation_loss": 3.448768138885498, "epoch": 4.82, "learning_rate": 2.878745186437494e-05, "loss": 109.0861, "step": 5700, "task_loss": 1.5466502904891968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999775124375654, "compression/movement_sparsity/importance_threshold": -1.574969240309386e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233958560369485, "compression/movement_sparsity/model_sparsity": 0.8916743676841605, "compression_loss": 105.59616088867188, "distillation_loss": 5.1056108474731445, "epoch": 4.82, "learning_rate": 2.878275570583263e-05, "loss": 109.8351, "step": 5701, "task_loss": 1.883887529373169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997782332109333, "compression/movement_sparsity/importance_threshold": -1.5531957824150774e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234229358216496, "compression/movement_sparsity/model_sparsity": 0.8917005171949499, "compression_loss": 105.5959243774414, "distillation_loss": 4.011333465576172, "epoch": 4.82, "learning_rate": 2.877805954729032e-05, "loss": 109.2304, "step": 5702, "task_loss": 1.9362305402755737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997813132609075, "compression/movement_sparsity/importance_threshold": -1.531623929165206e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233343988769532, "compression/movement_sparsity/model_sparsity": 0.8916150217666755, "compression_loss": 105.5956039428711, "distillation_loss": 2.5746169090270996, "epoch": 4.82, "learning_rate": 2.8773363388748004e-05, "loss": 108.899, "step": 5703, "task_loss": 2.109720468521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997843646594615, "compression/movement_sparsity/importance_threshold": -1.51025274286367e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233859351294754, "compression/movement_sparsity/model_sparsity": 0.8916647875903795, "compression_loss": 105.59537506103516, "distillation_loss": 4.674323558807373, "epoch": 4.82, "learning_rate": 2.8768667230205694e-05, "loss": 109.8116, "step": 5704, "task_loss": 2.632923126220703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997873875404806, "compression/movement_sparsity/importance_threshold": -1.4890812858152358e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233510330908054, "compression/movement_sparsity/model_sparsity": 0.8916310845441089, "compression_loss": 105.59503173828125, "distillation_loss": 3.91969633102417, "epoch": 4.82, "learning_rate": 2.876397107166338e-05, "loss": 109.6389, "step": 5705, "task_loss": 2.8761422634124756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997903820378499, "compression/movement_sparsity/importance_threshold": -1.4681086203229346e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233411479558352, "compression/movement_sparsity/model_sparsity": 0.8916215389939351, "compression_loss": 105.5947265625, "distillation_loss": 3.846813201904297, "epoch": 4.82, "learning_rate": 2.875927491312107e-05, "loss": 109.6477, "step": 5706, "task_loss": 2.1879689693450928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997933482854545, "compression/movement_sparsity/importance_threshold": -1.447333808690665e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9233681919680333, "compression/movement_sparsity/model_sparsity": 0.8916476539611172, "compression_loss": 105.59439086914062, "distillation_loss": 4.152050018310547, "epoch": 4.82, "learning_rate": 2.8754578754578753e-05, "loss": 109.2326, "step": 5707, "task_loss": 2.0574676990509033 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997962864171795, "compression/movement_sparsity/importance_threshold": -1.4267559132231933e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234565619743826, "compression/movement_sparsity/model_sparsity": 0.8917329881858905, "compression_loss": 105.59405517578125, "distillation_loss": 4.01668643951416, "epoch": 4.82, "learning_rate": 2.8749882596036443e-05, "loss": 109.5796, "step": 5708, "task_loss": 2.178811550140381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9997991965669102, "compression/movement_sparsity/importance_threshold": -1.4063739962244179e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234674964361048, "compression/movement_sparsity/model_sparsity": 0.8917435470152141, "compression_loss": 105.59379577636719, "distillation_loss": 3.8174896240234375, "epoch": 4.83, "learning_rate": 2.8745186437494132e-05, "loss": 109.4665, "step": 5709, "task_loss": 1.850546956062317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998020788685317, "compression/movement_sparsity/importance_threshold": -1.3861871199973702e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923479539845417, "compression/movement_sparsity/model_sparsity": 0.8917551766963666, "compression_loss": 105.59341430664062, "distillation_loss": 3.6967933177948, "epoch": 4.83, "learning_rate": 2.8740490278951822e-05, "loss": 109.4936, "step": 5710, "task_loss": 2.026614189147949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999804933455929, "compression/movement_sparsity/importance_threshold": -1.3661943468468163e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234189531496592, "compression/movement_sparsity/model_sparsity": 0.8916966713399945, "compression_loss": 105.5931396484375, "distillation_loss": 4.380904674530029, "epoch": 4.83, "learning_rate": 2.8735794120409505e-05, "loss": 110.4106, "step": 5711, "task_loss": 2.534013032913208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998077604629874, "compression/movement_sparsity/importance_threshold": -1.3463947390775222e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234079948396018, "compression/movement_sparsity/model_sparsity": 0.8916860894815993, "compression_loss": 105.5927963256836, "distillation_loss": 4.466061592102051, "epoch": 4.83, "learning_rate": 2.873109796186719e-05, "loss": 109.6774, "step": 5712, "task_loss": 2.956721067428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998105600235919, "compression/movement_sparsity/importance_threshold": -1.3267873589916518e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234230312149907, "compression/movement_sparsity/model_sparsity": 0.8917006093112363, "compression_loss": 105.59247589111328, "distillation_loss": 3.1775381565093994, "epoch": 4.83, "learning_rate": 2.872640180332488e-05, "loss": 109.0873, "step": 5713, "task_loss": 2.629027843475342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998133322716277, "compression/movement_sparsity/importance_threshold": -1.3073712688948386e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234189769979945, "compression/movement_sparsity/model_sparsity": 0.8916966943690662, "compression_loss": 105.59213256835938, "distillation_loss": 4.9218292236328125, "epoch": 4.83, "learning_rate": 2.872170564478257e-05, "loss": 109.573, "step": 5714, "task_loss": 3.185810089111328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99981607734098, "compression/movement_sparsity/importance_threshold": -1.2881455310901138e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234805176271631, "compression/movement_sparsity/model_sparsity": 0.8917561208883018, "compression_loss": 105.59181213378906, "distillation_loss": 4.257322311401367, "epoch": 4.83, "learning_rate": 2.871700948624026e-05, "loss": 109.1893, "step": 5715, "task_loss": 2.3104312419891357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999818795365534, "compression/movement_sparsity/importance_threshold": -1.2691092078813762e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234896396154045, "compression/movement_sparsity/model_sparsity": 0.8917649295081845, "compression_loss": 105.59149932861328, "distillation_loss": 4.190281867980957, "epoch": 4.83, "learning_rate": 2.8712313327697943e-05, "loss": 109.5848, "step": 5716, "task_loss": 2.6030220985412598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998214864791746, "compression/movement_sparsity/importance_threshold": -1.2502613615725244e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.923462512134033, "compression/movement_sparsity/model_sparsity": 0.891738733939252, "compression_loss": 105.59109497070312, "distillation_loss": 3.924710273742676, "epoch": 4.83, "learning_rate": 2.8707617169155633e-05, "loss": 110.1064, "step": 5717, "task_loss": 2.252213716506958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998241508157871, "compression/movement_sparsity/importance_threshold": -1.2316010544683245e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9234779420069538, "compression/movement_sparsity/model_sparsity": 0.8917536337485701, "compression_loss": 105.5907974243164, "distillation_loss": 4.670319557189941, "epoch": 4.83, "learning_rate": 2.870292101061332e-05, "loss": 109.066, "step": 5718, "task_loss": 2.2773499488830566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998267885092567, "compression/movement_sparsity/importance_threshold": -1.2131273488726751e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235022315364281, "compression/movement_sparsity/model_sparsity": 0.8917770888579836, "compression_loss": 105.59042358398438, "distillation_loss": 4.5051374435424805, "epoch": 4.83, "learning_rate": 2.869822485207101e-05, "loss": 108.9894, "step": 5719, "task_loss": 2.8995678424835205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998293996934683, "compression/movement_sparsity/importance_threshold": -1.1948393070886076e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235205709062521, "compression/movement_sparsity/model_sparsity": 0.8917947982140356, "compression_loss": 105.59002685546875, "distillation_loss": 4.603326320648193, "epoch": 4.83, "learning_rate": 2.8693528693528692e-05, "loss": 110.2865, "step": 5720, "task_loss": 2.6093969345092773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998319845023073, "compression/movement_sparsity/importance_threshold": -1.1767359914217554e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235696388560736, "compression/movement_sparsity/model_sparsity": 0.8918421805288301, "compression_loss": 105.5896987915039, "distillation_loss": 2.309217929840088, "epoch": 4.84, "learning_rate": 2.8688832534986382e-05, "loss": 108.5457, "step": 5721, "task_loss": 1.401188850402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998345430696587, "compression/movement_sparsity/importance_threshold": -1.1588164641742824e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9235923305470847, "compression/movement_sparsity/model_sparsity": 0.8918640926904472, "compression_loss": 105.58939361572266, "distillation_loss": 3.5453619956970215, "epoch": 4.84, "learning_rate": 2.868413637644407e-05, "loss": 109.3182, "step": 5722, "task_loss": 1.51968514919281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998370755294077, "compression/movement_sparsity/importance_threshold": -1.1410797876509546e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236255035814476, "compression/movement_sparsity/model_sparsity": 0.8918961261290276, "compression_loss": 105.58903503417969, "distillation_loss": 4.044450759887695, "epoch": 4.84, "learning_rate": 2.8679440217901758e-05, "loss": 108.8695, "step": 5723, "task_loss": 1.8900260925292969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998395820154394, "compression/movement_sparsity/importance_threshold": -1.1235250241556707e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236886658974147, "compression/movement_sparsity/model_sparsity": 0.8919571186251313, "compression_loss": 105.58867645263672, "distillation_loss": 4.239625453948975, "epoch": 4.84, "learning_rate": 2.8674744059359444e-05, "loss": 110.0528, "step": 5724, "task_loss": 2.970317840576172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999842062661639, "compression/movement_sparsity/importance_threshold": -1.1061512359923295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237127407918716, "compression/movement_sparsity/model_sparsity": 0.8919803664729005, "compression_loss": 105.58840942382812, "distillation_loss": 2.983698844909668, "epoch": 4.84, "learning_rate": 2.867004790081713e-05, "loss": 108.9478, "step": 5725, "task_loss": 1.7866815328598022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998445176018915, "compression/movement_sparsity/importance_threshold": -1.088957485465697e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9237099624608124, "compression/movement_sparsity/model_sparsity": 0.8919776835860603, "compression_loss": 105.58805084228516, "distillation_loss": 4.215619087219238, "epoch": 4.84, "learning_rate": 2.866535174227482e-05, "loss": 109.5378, "step": 5726, "task_loss": 2.1453146934509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998469469700823, "compression/movement_sparsity/importance_threshold": -1.0719428348788043e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236950453271, "compression/movement_sparsity/model_sparsity": 0.8919632789017814, "compression_loss": 105.5876693725586, "distillation_loss": 3.587468385696411, "epoch": 4.84, "learning_rate": 2.866065558373251e-05, "loss": 109.0754, "step": 5727, "task_loss": 1.9469712972640991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998493509000963, "compression/movement_sparsity/importance_threshold": -1.0551063465355504e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236696945467061, "compression/movement_sparsity/model_sparsity": 0.8919387989986821, "compression_loss": 105.58728790283203, "distillation_loss": 3.4647626876831055, "epoch": 4.84, "learning_rate": 2.8655959425190193e-05, "loss": 108.9996, "step": 5728, "task_loss": 3.018655300140381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998517295258187, "compression/movement_sparsity/importance_threshold": -1.038447082740701e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236950334029324, "compression/movement_sparsity/model_sparsity": 0.8919632673872455, "compression_loss": 105.58688354492188, "distillation_loss": 3.6663095951080322, "epoch": 4.84, "learning_rate": 2.8651263266647883e-05, "loss": 109.9913, "step": 5729, "task_loss": 1.7663112878799438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998540829811348, "compression/movement_sparsity/importance_threshold": -1.0219641057972878e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236831927044699, "compression/movement_sparsity/model_sparsity": 0.8919518334532016, "compression_loss": 105.5865249633789, "distillation_loss": 6.020182132720947, "epoch": 4.84, "learning_rate": 2.8646567108105572e-05, "loss": 109.551, "step": 5730, "task_loss": 3.4187960624694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998564113999294, "compression/movement_sparsity/importance_threshold": -1.005656478010944e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9236237268804699, "compression/movement_sparsity/model_sparsity": 0.8918944104631942, "compression_loss": 105.58613586425781, "distillation_loss": 3.071834087371826, "epoch": 4.84, "learning_rate": 2.864187094956326e-05, "loss": 109.2762, "step": 5731, "task_loss": 2.4498469829559326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998587149160879, "compression/movement_sparsity/importance_threshold": -9.895232616838334e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9236344467071745, "compression/movement_sparsity/model_sparsity": 0.8919047620308734, "compression_loss": 105.58580780029297, "distillation_loss": 4.7092390060424805, "epoch": 4.84, "learning_rate": 2.863717479102095e-05, "loss": 109.4559, "step": 5732, "task_loss": 2.411214828491211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998609936634955, "compression/movement_sparsity/importance_threshold": -9.735635191207223e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9236509855276854, "compression/movement_sparsity/model_sparsity": 0.8919207326920205, "compression_loss": 105.58537292480469, "distillation_loss": 4.833512306213379, "epoch": 4.85, "learning_rate": 2.863247863247863e-05, "loss": 109.6277, "step": 5733, "task_loss": 3.463469982147217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998632477760372, "compression/movement_sparsity/importance_threshold": -9.57776312624642e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9236685498266131, "compression/movement_sparsity/model_sparsity": 0.8919376936032459, "compression_loss": 105.58502197265625, "distillation_loss": 3.6072967052459717, "epoch": 4.85, "learning_rate": 2.862778247393632e-05, "loss": 109.213, "step": 5734, "task_loss": 3.7080564498901367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999865477387598, "compression/movement_sparsity/importance_threshold": -9.42160704502093e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9236592370516895, "compression/movement_sparsity/model_sparsity": 0.8919287007507903, "compression_loss": 105.58460998535156, "distillation_loss": 4.377493858337402, "epoch": 4.85, "learning_rate": 2.862308631539401e-05, "loss": 110.095, "step": 5735, "task_loss": 3.240359306335449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998676826320634, "compression/movement_sparsity/importance_threshold": -9.267157570543721e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9236830376902906, "compression/movement_sparsity/model_sparsity": 0.8919516837642363, "compression_loss": 105.58422088623047, "distillation_loss": 4.202365875244141, "epoch": 4.85, "learning_rate": 2.8618390156851697e-05, "loss": 109.624, "step": 5736, "task_loss": 1.9415459632873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998698636433183, "compression/movement_sparsity/importance_threshold": -9.114405325862454e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237067190872155, "compression/movement_sparsity/model_sparsity": 0.8919745516323242, "compression_loss": 105.58385467529297, "distillation_loss": 4.08169412612915, "epoch": 4.85, "learning_rate": 2.8613693998309384e-05, "loss": 109.8512, "step": 5737, "task_loss": 2.1521048545837402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998720205552478, "compression/movement_sparsity/importance_threshold": -8.963340934024788e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237517447442085, "compression/movement_sparsity/model_sparsity": 0.8920180305194844, "compression_loss": 105.58351135253906, "distillation_loss": 5.323280334472656, "epoch": 4.85, "learning_rate": 2.860899783976707e-05, "loss": 109.881, "step": 5738, "task_loss": 3.480252742767334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998741535017373, "compression/movement_sparsity/importance_threshold": -8.813955018061037e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238464345594048, "compression/movement_sparsity/model_sparsity": 0.8921094674482288, "compression_loss": 105.58313751220703, "distillation_loss": 4.512420654296875, "epoch": 4.85, "learning_rate": 2.860430168122476e-05, "loss": 109.4066, "step": 5739, "task_loss": 1.8507853746414185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998762626166716, "compression/movement_sparsity/importance_threshold": -8.666238201018861e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237801481115171, "compression/movement_sparsity/model_sparsity": 0.892045458143747, "compression_loss": 105.58281707763672, "distillation_loss": 2.7434792518615723, "epoch": 4.85, "learning_rate": 2.859960552268245e-05, "loss": 109.1152, "step": 5740, "task_loss": 2.4181931018829346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999878348033936, "compression/movement_sparsity/importance_threshold": -8.520181105937247e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237454010870262, "compression/movement_sparsity/model_sparsity": 0.8920119047864417, "compression_loss": 105.5824203491211, "distillation_loss": 4.774539470672607, "epoch": 4.85, "learning_rate": 2.8594909364140132e-05, "loss": 109.4507, "step": 5741, "task_loss": 3.4477450847625732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998804098874158, "compression/movement_sparsity/importance_threshold": -8.375774355846508e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237523648009256, "compression/movement_sparsity/model_sparsity": 0.8920186292753457, "compression_loss": 105.58198547363281, "distillation_loss": 4.162593841552734, "epoch": 4.85, "learning_rate": 2.8590213205597822e-05, "loss": 109.9438, "step": 5742, "task_loss": 2.7600302696228027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998824483109959, "compression/movement_sparsity/importance_threshold": -8.233008573794304e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237970327328895, "compression/movement_sparsity/model_sparsity": 0.892061762726432, "compression_loss": 105.5815658569336, "distillation_loss": 5.0269036293029785, "epoch": 4.85, "learning_rate": 2.858551704705551e-05, "loss": 109.9769, "step": 5743, "task_loss": 1.986816644668579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998844634385615, "compression/movement_sparsity/importance_threshold": -8.091874382828296e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238077048629236, "compression/movement_sparsity/model_sparsity": 0.8920720682359682, "compression_loss": 105.5811538696289, "distillation_loss": 4.615762710571289, "epoch": 4.85, "learning_rate": 2.8580820888513198e-05, "loss": 108.9336, "step": 5744, "task_loss": 2.7693445682525635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998864554039979, "compression/movement_sparsity/importance_threshold": -7.95236240596145e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238302057672525, "compression/movement_sparsity/model_sparsity": 0.8920937961650125, "compression_loss": 105.58065032958984, "distillation_loss": 3.2815237045288086, "epoch": 4.86, "learning_rate": 2.8576124729970888e-05, "loss": 109.2264, "step": 5745, "task_loss": 2.0057530403137207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99988842434119, "compression/movement_sparsity/importance_threshold": -7.814463266267446e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238214295798725, "compression/movement_sparsity/model_sparsity": 0.8920853214666676, "compression_loss": 105.58023071289062, "distillation_loss": 5.442292213439941, "epoch": 4.86, "learning_rate": 2.857142857142857e-05, "loss": 109.7506, "step": 5746, "task_loss": 3.8159613609313965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998903703840231, "compression/movement_sparsity/importance_threshold": -7.678167586759252e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238004430448333, "compression/movement_sparsity/model_sparsity": 0.8920650558836692, "compression_loss": 105.57976531982422, "distillation_loss": 5.277232646942139, "epoch": 4.86, "learning_rate": 2.856673241288626e-05, "loss": 109.6366, "step": 5747, "task_loss": 1.6122894287109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998922936663822, "compression/movement_sparsity/importance_threshold": -7.543465990493201e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.92378215137168, "compression/movement_sparsity/model_sparsity": 0.8920473925857605, "compression_loss": 105.57936096191406, "distillation_loss": 4.456130504608154, "epoch": 4.86, "learning_rate": 2.856203625434395e-05, "loss": 109.0319, "step": 5748, "task_loss": 3.589682102203369 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998941943221527, "compression/movement_sparsity/importance_threshold": -7.410349100499605e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237086746507077, "compression/movement_sparsity/model_sparsity": 0.8919764400161945, "compression_loss": 105.57891845703125, "distillation_loss": 3.532195568084717, "epoch": 4.86, "learning_rate": 2.8557340095801637e-05, "loss": 109.139, "step": 5749, "task_loss": 2.792116641998291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998960724852195, "compression/movement_sparsity/importance_threshold": -7.278807539817453e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237567409704479, "compression/movement_sparsity/model_sparsity": 0.8920228551099822, "compression_loss": 105.57845306396484, "distillation_loss": 5.264936447143555, "epoch": 4.86, "learning_rate": 2.855264393725932e-05, "loss": 109.5717, "step": 5750, "task_loss": 2.6098172664642334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998979282894678, "compression/movement_sparsity/importance_threshold": -7.148831931494404e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238211910965197, "compression/movement_sparsity/model_sparsity": 0.8920850911759518, "compression_loss": 105.5780029296875, "distillation_loss": 4.24552059173584, "epoch": 4.86, "learning_rate": 2.854794777871701e-05, "loss": 109.4594, "step": 5751, "task_loss": 2.437236785888672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998997618687827, "compression/movement_sparsity/importance_threshold": -7.020412898569445e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238234805367058, "compression/movement_sparsity/model_sparsity": 0.8920873019668244, "compression_loss": 105.5774917602539, "distillation_loss": 3.8099443912506104, "epoch": 4.86, "learning_rate": 2.85432516201747e-05, "loss": 109.8693, "step": 5752, "task_loss": 2.272874593734741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999015733570495, "compression/movement_sparsity/importance_threshold": -6.893541064072889e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238787967503685, "compression/movement_sparsity/model_sparsity": 0.8921407178983752, "compression_loss": 105.57706451416016, "distillation_loss": 5.645594596862793, "epoch": 4.86, "learning_rate": 2.853855546163239e-05, "loss": 109.4767, "step": 5753, "task_loss": 2.3698890209198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999033628881532, "compression/movement_sparsity/importance_threshold": -6.76820705106107e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238786059636864, "compression/movement_sparsity/model_sparsity": 0.8921405336658025, "compression_loss": 105.57659149169922, "distillation_loss": 3.5185301303863525, "epoch": 4.86, "learning_rate": 2.8533859303090072e-05, "loss": 108.9473, "step": 5754, "task_loss": 1.791491150856018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999905130595979, "compression/movement_sparsity/importance_threshold": -6.644401482555629e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9239113139555116, "compression/movement_sparsity/model_sparsity": 0.892172118037487, "compression_loss": 105.576171875, "distillation_loss": 4.260524749755859, "epoch": 4.86, "learning_rate": 2.852916314454776e-05, "loss": 109.6997, "step": 5755, "task_loss": 3.1400325298309326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999906876614412, "compression/movement_sparsity/importance_threshold": -6.522114981604224e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.923898888972835, "compression/movement_sparsity/model_sparsity": 0.892160119891189, "compression_loss": 105.5757064819336, "distillation_loss": 4.541952610015869, "epoch": 4.87, "learning_rate": 2.8524466986005448e-05, "loss": 109.1445, "step": 5756, "task_loss": 2.5373404026031494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999086010773374, "compression/movement_sparsity/importance_threshold": -6.401338171254517e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238775566369345, "compression/movement_sparsity/model_sparsity": 0.8921395203866526, "compression_loss": 105.57528686523438, "distillation_loss": 2.946751594543457, "epoch": 4.87, "learning_rate": 2.8519770827463138e-05, "loss": 109.086, "step": 5757, "task_loss": 1.5726948976516724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999103041186402, "compression/movement_sparsity/importance_threshold": -6.282061674536821e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238479847011976, "compression/movement_sparsity/model_sparsity": 0.8921109643378822, "compression_loss": 105.57479858398438, "distillation_loss": 4.083705902099609, "epoch": 4.87, "learning_rate": 2.851507466892082e-05, "loss": 109.5793, "step": 5758, "task_loss": 3.4338252544403076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999119858722058, "compression/movement_sparsity/importance_threshold": -6.164276114481448e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9237985351780116, "compression/movement_sparsity/model_sparsity": 0.8920632135579422, "compression_loss": 105.57438659667969, "distillation_loss": 4.285153388977051, "epoch": 4.87, "learning_rate": 2.851037851037851e-05, "loss": 109.9572, "step": 5759, "task_loss": 3.281860828399658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999913646471919, "compression/movement_sparsity/importance_threshold": -6.047972114153408e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238135954017357, "compression/movement_sparsity/model_sparsity": 0.8920777564166507, "compression_loss": 105.57390594482422, "distillation_loss": 4.781952857971191, "epoch": 4.87, "learning_rate": 2.85056823518362e-05, "loss": 109.593, "step": 5760, "task_loss": 2.2279229164123535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999152860516652, "compression/movement_sparsity/importance_threshold": -5.933140296574338e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.923847150009463, "compression/movement_sparsity/model_sparsity": 0.8921101583203765, "compression_loss": 105.57349395751953, "distillation_loss": 4.786239147186279, "epoch": 4.87, "learning_rate": 2.850098619329389e-05, "loss": 110.3997, "step": 5761, "task_loss": 2.6992626190185547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999169047453296, "compression/movement_sparsity/importance_threshold": -5.819771284783226e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9238868574876904, "compression/movement_sparsity/model_sparsity": 0.8921485017245723, "compression_loss": 105.57303619384766, "distillation_loss": 3.549647331237793, "epoch": 4.87, "learning_rate": 2.8496290034751576e-05, "loss": 109.2187, "step": 5762, "task_loss": 1.4663758277893066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999918502686797, "compression/movement_sparsity/importance_threshold": -5.707855701827733e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9239481238610036, "compression/movement_sparsity/model_sparsity": 0.8922076634094847, "compression_loss": 105.57261657714844, "distillation_loss": 3.6878225803375244, "epoch": 4.87, "learning_rate": 2.849159387620926e-05, "loss": 109.7149, "step": 5763, "task_loss": 2.080394983291626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999200800099528, "compression/movement_sparsity/importance_threshold": -5.597384170746844e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9239817261654014, "compression/movement_sparsity/model_sparsity": 0.8922401113713537, "compression_loss": 105.57218170166016, "distillation_loss": 3.0918312072753906, "epoch": 4.87, "learning_rate": 2.848689771766695e-05, "loss": 108.8809, "step": 5764, "task_loss": 1.2333855628967285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999216368486822, "compression/movement_sparsity/importance_threshold": -5.488347314570874e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924062488552799, "compression/movement_sparsity/model_sparsity": 0.8923180993222901, "compression_loss": 105.57167053222656, "distillation_loss": 4.3989715576171875, "epoch": 4.87, "learning_rate": 2.848220155912464e-05, "loss": 109.465, "step": 5765, "task_loss": 3.06538987159729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999231733368701, "compression/movement_sparsity/importance_threshold": -5.380735756356156e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924017892165841, "compression/movement_sparsity/model_sparsity": 0.8922750349584185, "compression_loss": 105.57122802734375, "distillation_loss": 5.130695819854736, "epoch": 4.87, "learning_rate": 2.8477505400582328e-05, "loss": 110.0723, "step": 5766, "task_loss": 2.618840456008911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999246896084018, "compression/movement_sparsity/importance_threshold": -5.274540119133003e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240098791251896, "compression/movement_sparsity/model_sparsity": 0.8922672971903646, "compression_loss": 105.57072448730469, "distillation_loss": 4.058493137359619, "epoch": 4.87, "learning_rate": 2.847280924204001e-05, "loss": 109.6253, "step": 5767, "task_loss": 2.040745258331299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999261857971624, "compression/movement_sparsity/importance_threshold": -5.169751025940403e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9239878671117339, "compression/movement_sparsity/model_sparsity": 0.8922460413572879, "compression_loss": 105.57025146484375, "distillation_loss": 3.8542556762695312, "epoch": 4.88, "learning_rate": 2.84681130834977e-05, "loss": 109.5365, "step": 5768, "task_loss": 2.327390670776367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999276620370371, "compression/movement_sparsity/importance_threshold": -5.066359099817341e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9239674767850765, "compression/movement_sparsity/model_sparsity": 0.8922263515010792, "compression_loss": 105.56973266601562, "distillation_loss": 4.185609817504883, "epoch": 4.88, "learning_rate": 2.8463416924955387e-05, "loss": 109.5494, "step": 5769, "task_loss": 2.3065922260284424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999291184619109, "compression/movement_sparsity/importance_threshold": -4.964354963811479e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240229837854215, "compression/movement_sparsity/model_sparsity": 0.8922799516652028, "compression_loss": 105.56925201416016, "distillation_loss": 3.783383846282959, "epoch": 4.88, "learning_rate": 2.8458720766413077e-05, "loss": 109.0953, "step": 5770, "task_loss": 1.3077412843704224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999305552056691, "compression/movement_sparsity/importance_threshold": -4.863729240944456e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240214455677964, "compression/movement_sparsity/model_sparsity": 0.8922784662900853, "compression_loss": 105.56871032714844, "distillation_loss": 4.315218925476074, "epoch": 4.88, "learning_rate": 2.845402460787076e-05, "loss": 109.6265, "step": 5771, "task_loss": 2.119558095932007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999319724021967, "compression/movement_sparsity/importance_threshold": -4.7644725542812794e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240678544282352, "compression/movement_sparsity/model_sparsity": 0.8923232808633976, "compression_loss": 105.56824493408203, "distillation_loss": 3.453381299972534, "epoch": 4.88, "learning_rate": 2.844932844932845e-05, "loss": 109.5374, "step": 5772, "task_loss": 2.280989170074463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999933370185379, "compression/movement_sparsity/importance_threshold": -4.666575526843589e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240410488993898, "compression/movement_sparsity/model_sparsity": 0.8922973961869315, "compression_loss": 105.56769561767578, "distillation_loss": 3.2475063800811768, "epoch": 4.88, "learning_rate": 2.844463229078614e-05, "loss": 108.978, "step": 5773, "task_loss": 1.432131290435791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999347486891009, "compression/movement_sparsity/importance_threshold": -4.5700287816790447e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240283735091929, "compression/movement_sparsity/model_sparsity": 0.892285156235382, "compression_loss": 105.56719970703125, "distillation_loss": 4.007950782775879, "epoch": 4.88, "learning_rate": 2.843993613224383e-05, "loss": 108.9299, "step": 5774, "task_loss": 1.9063670635223389 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999361080472479, "compression/movement_sparsity/importance_threshold": -4.47482294181796e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240646587513088, "compression/movement_sparsity/model_sparsity": 0.8923201949678047, "compression_loss": 105.5666732788086, "distillation_loss": 4.3854804039001465, "epoch": 4.88, "learning_rate": 2.8435239973701515e-05, "loss": 109.769, "step": 5775, "task_loss": 2.359445333480835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999374483937048, "compression/movement_sparsity/importance_threshold": -4.380948630307996e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241024464385469, "compression/movement_sparsity/model_sparsity": 0.8923566845317377, "compression_loss": 105.56620025634766, "distillation_loss": 4.397212982177734, "epoch": 4.88, "learning_rate": 2.84305438151592e-05, "loss": 109.9174, "step": 5776, "task_loss": 3.209820032119751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999387698623569, "compression/movement_sparsity/importance_threshold": -4.288396470196812e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241573810788453, "compression/movement_sparsity/model_sparsity": 0.8924097319981431, "compression_loss": 105.56568908691406, "distillation_loss": 2.1097731590270996, "epoch": 4.88, "learning_rate": 2.8425847656616888e-05, "loss": 108.9489, "step": 5777, "task_loss": 2.133603096008301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999400725870893, "compression/movement_sparsity/importance_threshold": -4.197157084506048e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241647621386119, "compression/movement_sparsity/model_sparsity": 0.8924168594957999, "compression_loss": 105.5652084350586, "distillation_loss": 4.154838562011719, "epoch": 4.88, "learning_rate": 2.8421151498074578e-05, "loss": 109.4296, "step": 5778, "task_loss": 3.022413730621338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999413567017872, "compression/movement_sparsity/importance_threshold": -4.1072210962833644e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241285007448312, "compression/movement_sparsity/model_sparsity": 0.8923818437924487, "compression_loss": 105.5647201538086, "distillation_loss": 3.4539031982421875, "epoch": 4.88, "learning_rate": 2.8416455339532268e-05, "loss": 108.6337, "step": 5779, "task_loss": 2.017198324203491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999426223403356, "compression/movement_sparsity/importance_threshold": -4.018579128585095e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924114000956986, "compression/movement_sparsity/model_sparsity": 0.8923678421169225, "compression_loss": 105.56423950195312, "distillation_loss": 4.885636329650879, "epoch": 4.89, "learning_rate": 2.841175918098995e-05, "loss": 109.6398, "step": 5780, "task_loss": 2.961885690689087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999438696366199, "compression/movement_sparsity/importance_threshold": -3.931221804415533e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924143143622688, "compression/movement_sparsity/model_sparsity": 0.8923959836424044, "compression_loss": 105.5636978149414, "distillation_loss": 2.696073532104492, "epoch": 4.89, "learning_rate": 2.840706302244764e-05, "loss": 109.4633, "step": 5781, "task_loss": 2.0201523303985596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999450987245249, "compression/movement_sparsity/importance_threshold": -3.8451397468483584e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241687448106023, "compression/movement_sparsity/model_sparsity": 0.8924207053507552, "compression_loss": 105.56317138671875, "distillation_loss": 5.161613464355469, "epoch": 4.89, "learning_rate": 2.8402366863905327e-05, "loss": 110.2486, "step": 5782, "task_loss": 2.3222835063934326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999946309737936, "compression/movement_sparsity/importance_threshold": -3.7603235789052114e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241447056886484, "compression/movement_sparsity/model_sparsity": 0.8923974920465935, "compression_loss": 105.56259155273438, "distillation_loss": 4.012014389038086, "epoch": 4.89, "learning_rate": 2.8397670705363016e-05, "loss": 109.468, "step": 5783, "task_loss": 2.495551586151123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999475028107382, "compression/movement_sparsity/importance_threshold": -3.6767639236337524e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241288823181956, "compression/movement_sparsity/model_sparsity": 0.8923822122575942, "compression_loss": 105.56210327148438, "distillation_loss": 3.2486085891723633, "epoch": 4.89, "learning_rate": 2.83929745468207e-05, "loss": 109.3132, "step": 5784, "task_loss": 2.2157177925109863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999486780768168, "compression/movement_sparsity/importance_threshold": -3.594451404072968e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240917385360098, "compression/movement_sparsity/model_sparsity": 0.8923463444785942, "compression_loss": 105.56159973144531, "distillation_loss": 4.397359848022461, "epoch": 4.89, "learning_rate": 2.838827838827839e-05, "loss": 109.1712, "step": 5785, "task_loss": 2.541592597961426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999498356700568, "compression/movement_sparsity/importance_threshold": -3.513376643244498e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241066556697223, "compression/movement_sparsity/model_sparsity": 0.8923607491628731, "compression_loss": 105.56108093261719, "distillation_loss": 5.419126510620117, "epoch": 4.89, "learning_rate": 2.838358222973608e-05, "loss": 110.1396, "step": 5786, "task_loss": 3.4935543537139893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999509757243433, "compression/movement_sparsity/importance_threshold": -3.4335302642220233e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240641460121004, "compression/movement_sparsity/model_sparsity": 0.8923196998427656, "compression_loss": 105.56047821044922, "distillation_loss": 4.754867076873779, "epoch": 4.89, "learning_rate": 2.8378886071193765e-05, "loss": 109.0265, "step": 5787, "task_loss": 2.8066933155059814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999520983735616, "compression/movement_sparsity/importance_threshold": -3.35490289001851e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9240720159627401, "compression/movement_sparsity/model_sparsity": 0.89232729943639, "compression_loss": 105.55996704101562, "distillation_loss": 4.321389675140381, "epoch": 4.89, "learning_rate": 2.837418991265145e-05, "loss": 109.6211, "step": 5788, "task_loss": 1.9351894855499268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999532037515967, "compression/movement_sparsity/importance_threshold": -3.277485143681619e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241102209958454, "compression/movement_sparsity/model_sparsity": 0.8923641920090757, "compression_loss": 105.5594253540039, "distillation_loss": 4.105820178985596, "epoch": 4.89, "learning_rate": 2.8369493754109138e-05, "loss": 109.2398, "step": 5789, "task_loss": 1.4320447444915771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999542919923339, "compression/movement_sparsity/importance_threshold": -3.201267648250336e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241332704118855, "compression/movement_sparsity/model_sparsity": 0.8923864496067665, "compression_loss": 105.55889892578125, "distillation_loss": 3.9293527603149414, "epoch": 4.89, "learning_rate": 2.8364797595566827e-05, "loss": 109.7271, "step": 5790, "task_loss": 3.0048444271087646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999553632296582, "compression/movement_sparsity/importance_threshold": -3.1262410267723223e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924100836675916, "compression/movement_sparsity/model_sparsity": 0.8923551300694054, "compression_loss": 105.55841827392578, "distillation_loss": 5.304651260375977, "epoch": 4.89, "learning_rate": 2.8360101437024517e-05, "loss": 109.4457, "step": 5791, "task_loss": 3.111219644546509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999564175974547, "compression/movement_sparsity/importance_threshold": -3.0523959022778907e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924108074645671, "compression/movement_sparsity/model_sparsity": 0.8923621193926327, "compression_loss": 105.55789184570312, "distillation_loss": 5.228198051452637, "epoch": 4.9, "learning_rate": 2.8355405278482207e-05, "loss": 109.8292, "step": 5792, "task_loss": 2.5225722789764404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999574552296088, "compression/movement_sparsity/importance_threshold": -2.979722897806028e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241846278018933, "compression/movement_sparsity/model_sparsity": 0.8924360427124336, "compression_loss": 105.557373046875, "distillation_loss": 4.680537700653076, "epoch": 4.9, "learning_rate": 2.835070911993989e-05, "loss": 109.5083, "step": 5793, "task_loss": 3.023618698120117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999584762600052, "compression/movement_sparsity/importance_threshold": -2.9082126364130684e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241648813802882, "compression/movement_sparsity/model_sparsity": 0.8924169746411579, "compression_loss": 105.55689239501953, "distillation_loss": 3.011064291000366, "epoch": 4.9, "learning_rate": 2.8346012961397576e-05, "loss": 109.1844, "step": 5794, "task_loss": 2.1663336753845215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999594808225295, "compression/movement_sparsity/importance_threshold": -2.8378557411119776e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241316725734223, "compression/movement_sparsity/model_sparsity": 0.8923849066589701, "compression_loss": 105.55635833740234, "distillation_loss": 5.132295608520508, "epoch": 4.9, "learning_rate": 2.8341316802855266e-05, "loss": 110.3995, "step": 5795, "task_loss": 2.7465851306915283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999604690510666, "compression/movement_sparsity/importance_threshold": -2.7686428349590897e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241634027835014, "compression/movement_sparsity/model_sparsity": 0.8924155468387193, "compression_loss": 105.55585479736328, "distillation_loss": 4.136115550994873, "epoch": 4.9, "learning_rate": 2.8336620644312956e-05, "loss": 109.5348, "step": 5796, "task_loss": 2.054055690765381 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999614410795017, "compression/movement_sparsity/importance_threshold": -2.700564541002065e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924181134020776, "compression/movement_sparsity/model_sparsity": 0.8924326689534458, "compression_loss": 105.55535888671875, "distillation_loss": 4.318592071533203, "epoch": 4.9, "learning_rate": 2.833192448577064e-05, "loss": 108.8359, "step": 5797, "task_loss": 1.6425416469573975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99996239704172, "compression/movement_sparsity/importance_threshold": -2.63361148225387e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241325907343303, "compression/movement_sparsity/model_sparsity": 0.8923857932782262, "compression_loss": 105.55484771728516, "distillation_loss": 4.442142963409424, "epoch": 4.9, "learning_rate": 2.8327228327228328e-05, "loss": 109.8726, "step": 5798, "task_loss": 2.545729160308838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999633370716063, "compression/movement_sparsity/importance_threshold": -2.567774281788185e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924181885243337, "compression/movement_sparsity/model_sparsity": 0.8924333943692009, "compression_loss": 105.55436706542969, "distillation_loss": 3.740147113800049, "epoch": 4.9, "learning_rate": 2.8322532168686018e-05, "loss": 110.0629, "step": 5799, "task_loss": 1.9118313789367676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999642613030463, "compression/movement_sparsity/importance_threshold": -2.5030435626179764e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241856771286452, "compression/movement_sparsity/model_sparsity": 0.8924370559915835, "compression_loss": 105.55387878417969, "distillation_loss": 3.5427160263061523, "epoch": 4.9, "learning_rate": 2.8317836010143704e-05, "loss": 109.1896, "step": 5800, "task_loss": 1.6276402473449707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999651698699247, "compression/movement_sparsity/importance_threshold": -2.439409947790905e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241784868555607, "compression/movement_sparsity/model_sparsity": 0.8924301127264994, "compression_loss": 105.55339813232422, "distillation_loss": 4.611199378967285, "epoch": 4.9, "learning_rate": 2.831313985160139e-05, "loss": 109.7936, "step": 5801, "task_loss": 3.32287335395813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999660629061268, "compression/movement_sparsity/importance_threshold": -2.3768640603546304e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924137610808905, "compression/movement_sparsity/model_sparsity": 0.8923906408977957, "compression_loss": 105.55297088623047, "distillation_loss": 3.189418077468872, "epoch": 4.9, "learning_rate": 2.8308443693059077e-05, "loss": 110.0742, "step": 5802, "task_loss": 3.178426742553711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999669405455378, "compression/movement_sparsity/importance_threshold": -2.3153965233394663e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924112951630234, "compression/movement_sparsity/model_sparsity": 0.8923668288377726, "compression_loss": 105.55245971679688, "distillation_loss": 4.0032148361206055, "epoch": 4.9, "learning_rate": 2.8303747534516767e-05, "loss": 109.4448, "step": 5803, "task_loss": 2.4817261695861816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999678029220427, "compression/movement_sparsity/importance_threshold": -2.2549979597843994e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241331034735386, "compression/movement_sparsity/model_sparsity": 0.8923862884032654, "compression_loss": 105.55204010009766, "distillation_loss": 3.3149986267089844, "epoch": 4.91, "learning_rate": 2.8299051375974456e-05, "loss": 109.5433, "step": 5804, "task_loss": 2.5761148929595947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999686501695267, "compression/movement_sparsity/importance_threshold": -2.19565899273709e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241516336300448, "compression/movement_sparsity/model_sparsity": 0.8924041819918901, "compression_loss": 105.5515365600586, "distillation_loss": 5.790671348571777, "epoch": 4.91, "learning_rate": 2.829435521743214e-05, "loss": 110.3215, "step": 5805, "task_loss": 2.727064371109009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999969482421875, "compression/movement_sparsity/importance_threshold": -2.137370245236525e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241480683039217, "compression/movement_sparsity/model_sparsity": 0.8924007391456875, "compression_loss": 105.55103302001953, "distillation_loss": 2.8517465591430664, "epoch": 4.91, "learning_rate": 2.828965905888983e-05, "loss": 109.1225, "step": 5806, "task_loss": 2.2918078899383545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999702998129727, "compression/movement_sparsity/importance_threshold": -2.0801223403130176e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9241553539703472, "compression/movement_sparsity/model_sparsity": 0.892407774527058, "compression_loss": 105.55044555664062, "distillation_loss": 4.233865737915039, "epoch": 4.91, "learning_rate": 2.8284962900347516e-05, "loss": 109.2089, "step": 5807, "task_loss": 2.0620834827423096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999711024767048, "compression/movement_sparsity/importance_threshold": -2.0239059010229016e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9242205433948124, "compression/movement_sparsity/model_sparsity": 0.8924707244942468, "compression_loss": 105.5499038696289, "distillation_loss": 4.127224922180176, "epoch": 4.91, "learning_rate": 2.8280266741805205e-05, "loss": 109.7811, "step": 5808, "task_loss": 1.9554705619812012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999718905469568, "compression/movement_sparsity/importance_threshold": -1.9687115503878166e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9242476470278488, "compression/movement_sparsity/model_sparsity": 0.8924968970341077, "compression_loss": 105.54935455322266, "distillation_loss": 5.232905387878418, "epoch": 4.91, "learning_rate": 2.8275570583262895e-05, "loss": 109.798, "step": 5809, "task_loss": 3.2712509632110596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999726641576134, "compression/movement_sparsity/importance_threshold": -1.9145299114554232e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243031898006966, "compression/movement_sparsity/model_sparsity": 0.8925505317418387, "compression_loss": 105.54874420166016, "distillation_loss": 4.367021560668945, "epoch": 4.91, "learning_rate": 2.8270874424720578e-05, "loss": 109.8634, "step": 5810, "task_loss": 2.90411376953125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999734234425601, "compression/movement_sparsity/importance_threshold": -1.861351607264708e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9242878076244463, "compression/movement_sparsity/model_sparsity": 0.8925356779906638, "compression_loss": 105.54820251464844, "distillation_loss": 4.384200096130371, "epoch": 4.91, "learning_rate": 2.8266178266178268e-05, "loss": 110.0096, "step": 5811, "task_loss": 2.4015374183654785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999741685356818, "compression/movement_sparsity/importance_threshold": -1.8091672608633314e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9242968222951791, "compression/movement_sparsity/model_sparsity": 0.8925443829797244, "compression_loss": 105.54763793945312, "distillation_loss": 2.603367567062378, "epoch": 4.91, "learning_rate": 2.8261482107635957e-05, "loss": 109.0088, "step": 5812, "task_loss": 1.191414475440979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999748995708638, "compression/movement_sparsity/importance_threshold": -1.757967495272933e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924330615386259, "compression/movement_sparsity/model_sparsity": 0.8925770151741662, "compression_loss": 105.54710388183594, "distillation_loss": 2.8490684032440186, "epoch": 4.91, "learning_rate": 2.8256785949093644e-05, "loss": 109.0368, "step": 5813, "task_loss": 2.2161474227905273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999756166819911, "compression/movement_sparsity/importance_threshold": -1.7077429335585204e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243826286054867, "compression/movement_sparsity/model_sparsity": 0.892627241579302, "compression_loss": 105.54655456542969, "distillation_loss": 3.742361545562744, "epoch": 4.91, "learning_rate": 2.8252089790551327e-05, "loss": 109.2722, "step": 5814, "task_loss": 2.342648983001709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999976320002949, "compression/movement_sparsity/importance_threshold": -1.6584841987417331e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244133810338196, "compression/movement_sparsity/model_sparsity": 0.8926569375671161, "compression_loss": 105.54597473144531, "distillation_loss": 4.044370651245117, "epoch": 4.91, "learning_rate": 2.8247393632009016e-05, "loss": 108.9997, "step": 5815, "task_loss": 2.2245523929595947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999770096676225, "compression/movement_sparsity/importance_threshold": -1.610181913861558e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243723976696552, "compression/movement_sparsity/model_sparsity": 0.8926173621075902, "compression_loss": 105.54549407958984, "distillation_loss": 4.161084175109863, "epoch": 4.92, "learning_rate": 2.8242697473466706e-05, "loss": 109.3121, "step": 5816, "task_loss": 1.9251513481140137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999776858098969, "compression/movement_sparsity/importance_threshold": -1.5628267019656555e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924418377260059, "compression/movement_sparsity/model_sparsity": 0.892661762157614, "compression_loss": 105.54485321044922, "distillation_loss": 3.7956342697143555, "epoch": 4.92, "learning_rate": 2.8238001314924396e-05, "loss": 109.4383, "step": 5817, "task_loss": 2.2154626846313477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999783485636571, "compression/movement_sparsity/importance_threshold": -1.5164091860930123e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243449720840927, "compression/movement_sparsity/model_sparsity": 0.8925908786752628, "compression_loss": 105.54429626464844, "distillation_loss": 4.210668563842773, "epoch": 4.92, "learning_rate": 2.823330515638208e-05, "loss": 109.3034, "step": 5818, "task_loss": 1.7352038621902466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999789980627884, "compression/movement_sparsity/importance_threshold": -1.4709199892739416e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243694047035785, "compression/movement_sparsity/model_sparsity": 0.8926144719591058, "compression_loss": 105.54365539550781, "distillation_loss": 4.7040205001831055, "epoch": 4.92, "learning_rate": 2.822860899783977e-05, "loss": 109.8601, "step": 5819, "task_loss": 2.434617757797241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999979634441176, "compression/movement_sparsity/importance_threshold": -1.4263497345647774e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243058846625823, "compression/movement_sparsity/model_sparsity": 0.8925531340269283, "compression_loss": 105.54315185546875, "distillation_loss": 4.129504203796387, "epoch": 4.92, "learning_rate": 2.8223912839297455e-05, "loss": 109.5991, "step": 5820, "task_loss": 3.0206573009490967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999802578327048, "compression/movement_sparsity/importance_threshold": -1.382689044995833e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243949701189899, "compression/movement_sparsity/model_sparsity": 0.8926391591238493, "compression_loss": 105.54249572753906, "distillation_loss": 4.849095344543457, "epoch": 4.92, "learning_rate": 2.8219216680755145e-05, "loss": 109.2778, "step": 5821, "task_loss": 3.299720525741577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999808683712603, "compression/movement_sparsity/importance_threshold": -1.3399285435974212e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924425352898126, "compression/movement_sparsity/model_sparsity": 0.8926684981610538, "compression_loss": 105.54194641113281, "distillation_loss": 3.5940475463867188, "epoch": 4.92, "learning_rate": 2.8214520522212834e-05, "loss": 109.2517, "step": 5822, "task_loss": 2.327202796936035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999814661907274, "compression/movement_sparsity/importance_threshold": -1.2980588534258763e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244232423204545, "compression/movement_sparsity/model_sparsity": 0.8926664600882181, "compression_loss": 105.54135131835938, "distillation_loss": 4.946146488189697, "epoch": 4.92, "learning_rate": 2.8209824363670517e-05, "loss": 109.588, "step": 5823, "task_loss": 3.677462339401245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999820514249912, "compression/movement_sparsity/importance_threshold": -1.2570705975115115e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243963175499327, "compression/movement_sparsity/model_sparsity": 0.8926404602663941, "compression_loss": 105.54085540771484, "distillation_loss": 3.913628101348877, "epoch": 4.92, "learning_rate": 2.8205128205128207e-05, "loss": 109.2946, "step": 5824, "task_loss": 2.3776330947875977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999826242079369, "compression/movement_sparsity/importance_threshold": -1.216954398901987e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244144303605716, "compression/movement_sparsity/model_sparsity": 0.892657950846266, "compression_loss": 105.54023742675781, "distillation_loss": 4.738117694854736, "epoch": 4.92, "learning_rate": 2.8200432046585897e-05, "loss": 109.4187, "step": 5825, "task_loss": 2.7193286418914795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999831846734497, "compression/movement_sparsity/importance_threshold": -1.1777008806276162e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244156227773351, "compression/movement_sparsity/model_sparsity": 0.8926591022998455, "compression_loss": 105.5396957397461, "distillation_loss": 4.396115303039551, "epoch": 4.92, "learning_rate": 2.8195735888043583e-05, "loss": 109.6467, "step": 5826, "task_loss": 2.877631425857544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999837329554148, "compression/movement_sparsity/importance_threshold": -1.1393006657360594e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9243864204907949, "compression/movement_sparsity/model_sparsity": 0.8926309032016846, "compression_loss": 105.53909301757812, "distillation_loss": 3.7712345123291016, "epoch": 4.93, "learning_rate": 2.8191039729501266e-05, "loss": 109.2341, "step": 5827, "task_loss": 2.227628469467163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999842691877171, "compression/movement_sparsity/importance_threshold": -1.1017443772576296e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244111512144717, "compression/movement_sparsity/model_sparsity": 0.8926547843489225, "compression_loss": 105.53858184814453, "distillation_loss": 3.462930202484131, "epoch": 4.93, "learning_rate": 2.8186343570958956e-05, "loss": 109.922, "step": 5828, "task_loss": 1.478043556213379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999984793504242, "compression/movement_sparsity/importance_threshold": -1.065022638248661e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9244567492315112, "compression/movement_sparsity/model_sparsity": 0.8926988159338008, "compression_loss": 105.5379867553711, "distillation_loss": 3.968722105026245, "epoch": 4.93, "learning_rate": 2.8181647412416645e-05, "loss": 109.1859, "step": 5829, "task_loss": 2.567662239074707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999853060388745, "compression/movement_sparsity/importance_threshold": -1.0291260717221196e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.924458263600801, "compression/movement_sparsity/model_sparsity": 0.8927002782798468, "compression_loss": 105.53746032714844, "distillation_loss": 5.396866798400879, "epoch": 4.93, "learning_rate": 2.8176951253874335e-05, "loss": 110.2105, "step": 5830, "task_loss": 3.264470338821411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999858069254998, "compression/movement_sparsity/importance_threshold": -9.940453007430128e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244797509508809, "compression/movement_sparsity/model_sparsity": 0.8927210274733485, "compression_loss": 105.53691101074219, "distillation_loss": 5.850165843963623, "epoch": 4.93, "learning_rate": 2.8172255095332018e-05, "loss": 109.6238, "step": 5831, "task_loss": 2.836446523666382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999862962980028, "compression/movement_sparsity/importance_threshold": -9.597709483503275e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244462082673212, "compression/movement_sparsity/model_sparsity": 0.8926886370841585, "compression_loss": 105.53641510009766, "distillation_loss": 2.696895122528076, "epoch": 4.93, "learning_rate": 2.8167558936789708e-05, "loss": 108.5432, "step": 5832, "task_loss": 2.3448662757873535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999986774290269, "compression/movement_sparsity/importance_threshold": -9.262936375657033e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244630094195201, "compression/movement_sparsity/model_sparsity": 0.892704861065093, "compression_loss": 105.53585815429688, "distillation_loss": 2.3151326179504395, "epoch": 4.93, "learning_rate": 2.8162862778247394e-05, "loss": 109.1485, "step": 5833, "task_loss": 0.9078324437141418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999872410361835, "compression/movement_sparsity/importance_threshold": -8.936039914368005e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244384217858549, "compression/movement_sparsity/model_sparsity": 0.8926811180922846, "compression_loss": 105.53533172607422, "distillation_loss": 4.262421607971191, "epoch": 4.93, "learning_rate": 2.8158166619705084e-05, "loss": 109.3855, "step": 5834, "task_loss": 2.0490643978118896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999876966696312, "compression/movement_sparsity/importance_threshold": -8.616926330112795e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244081463242275, "compression/movement_sparsity/model_sparsity": 0.8926518826859022, "compression_loss": 105.53484344482422, "distillation_loss": 3.390775203704834, "epoch": 4.93, "learning_rate": 2.8153470461162767e-05, "loss": 109.3964, "step": 5835, "task_loss": 1.9313862323760986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999881413244974, "compression/movement_sparsity/importance_threshold": -8.305501853194536e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244277496558209, "compression/movement_sparsity/model_sparsity": 0.8926708125827485, "compression_loss": 105.53431701660156, "distillation_loss": 4.898916244506836, "epoch": 4.93, "learning_rate": 2.8148774302620457e-05, "loss": 109.5835, "step": 5836, "task_loss": 3.042492628097534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999885751346672, "compression/movement_sparsity/importance_threshold": -8.001672714003094e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244455405139336, "compression/movement_sparsity/model_sparsity": 0.8926879922701539, "compression_loss": 105.53374481201172, "distillation_loss": 3.7849502563476562, "epoch": 4.93, "learning_rate": 2.8144078144078146e-05, "loss": 110.0813, "step": 5837, "task_loss": 1.8033572435379028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999889982340258, "compression/movement_sparsity/importance_threshold": -7.70534514310181e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9244629617228495, "compression/movement_sparsity/model_sparsity": 0.8927048150069498, "compression_loss": 105.53325653076172, "distillation_loss": 2.847432851791382, "epoch": 4.93, "learning_rate": 2.8139381985535833e-05, "loss": 108.9619, "step": 5838, "task_loss": 1.4793275594711304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999894107564582, "compression/movement_sparsity/importance_threshold": -7.41642537070708e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9245609068358105, "compression/movement_sparsity/model_sparsity": 0.8927993954039661, "compression_loss": 105.53274536132812, "distillation_loss": 3.3297038078308105, "epoch": 4.94, "learning_rate": 2.8134685826993522e-05, "loss": 109.4299, "step": 5839, "task_loss": 1.1347813606262207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999898128358496, "compression/movement_sparsity/importance_threshold": -7.134819627295508e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9245997915464711, "compression/movement_sparsity/model_sparsity": 0.8928369443051921, "compression_loss": 105.53221893310547, "distillation_loss": 4.980269908905029, "epoch": 4.94, "learning_rate": 2.8129989668451205e-05, "loss": 109.5584, "step": 5840, "task_loss": 2.2784786224365234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999902046060852, "compression/movement_sparsity/importance_threshold": -6.86043414325696e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9245908245724089, "compression/movement_sparsity/model_sparsity": 0.8928282853742746, "compression_loss": 105.5317153930664, "distillation_loss": 4.127524375915527, "epoch": 4.94, "learning_rate": 2.8125293509908895e-05, "loss": 108.7755, "step": 5841, "task_loss": 2.0062153339385986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999905862010502, "compression/movement_sparsity/importance_threshold": -6.593175148981306e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9246091520180653, "compression/movement_sparsity/model_sparsity": 0.8928459832157909, "compression_loss": 105.53118896484375, "distillation_loss": 3.4782378673553467, "epoch": 4.94, "learning_rate": 2.8120597351366585e-05, "loss": 110.2042, "step": 5842, "task_loss": 1.7551798820495605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999909577546297, "compression/movement_sparsity/importance_threshold": -6.332948874771677e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9246162349736409, "compression/movement_sparsity/model_sparsity": 0.8928528228500529, "compression_loss": 105.53069305419922, "distillation_loss": 3.982268810272217, "epoch": 4.94, "learning_rate": 2.8115901192824275e-05, "loss": 108.8798, "step": 5843, "task_loss": 2.2816355228424072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999913194007086, "compression/movement_sparsity/importance_threshold": -6.079661551191412e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924609748226447, "compression/movement_sparsity/model_sparsity": 0.8928465589425806, "compression_loss": 105.53014373779297, "distillation_loss": 5.787306785583496, "epoch": 4.94, "learning_rate": 2.8111205034281958e-05, "loss": 109.4694, "step": 5844, "task_loss": 4.669091701507568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999916712731723, "compression/movement_sparsity/importance_threshold": -5.833219408543644e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924622519009985, "compression/movement_sparsity/model_sparsity": 0.8928588910104165, "compression_loss": 105.5296401977539, "distillation_loss": 5.608484745025635, "epoch": 4.94, "learning_rate": 2.8106508875739644e-05, "loss": 109.8145, "step": 5845, "task_loss": 2.7302603721618652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999920135059059, "compression/movement_sparsity/importance_threshold": -5.5935286773049764e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9246560736177123, "compression/movement_sparsity/model_sparsity": 0.8928912929141424, "compression_loss": 105.52909851074219, "distillation_loss": 4.4703288078308105, "epoch": 4.94, "learning_rate": 2.8101812717197334e-05, "loss": 109.4073, "step": 5846, "task_loss": 2.8261890411376953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999923462327946, "compression/movement_sparsity/importance_threshold": -5.360495587691805e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247058212450892, "compression/movement_sparsity/model_sparsity": 0.8929393315574772, "compression_loss": 105.52857971191406, "distillation_loss": 4.259686470031738, "epoch": 4.94, "learning_rate": 2.8097116558655023e-05, "loss": 109.172, "step": 5847, "task_loss": 1.9527904987335205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999926695877234, "compression/movement_sparsity/importance_threshold": -5.1340263703542055e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924717387687696, "compression/movement_sparsity/model_sparsity": 0.8929505006571979, "compression_loss": 105.5279769897461, "distillation_loss": 3.723161220550537, "epoch": 4.94, "learning_rate": 2.8092420400112706e-05, "loss": 109.0592, "step": 5848, "task_loss": 1.7695951461791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999929837045775, "compression/movement_sparsity/importance_threshold": -4.914027255508574e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9246842385016683, "compression/movement_sparsity/model_sparsity": 0.8929184902476891, "compression_loss": 105.52749633789062, "distillation_loss": 3.7978339195251465, "epoch": 4.94, "learning_rate": 2.8087724241570396e-05, "loss": 109.3704, "step": 5849, "task_loss": 1.4150173664093018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999993288717242, "compression/movement_sparsity/importance_threshold": -4.700404473631514e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924745349860802, "compression/movement_sparsity/model_sparsity": 0.892977502243636, "compression_loss": 105.5269546508789, "distillation_loss": 4.235128402709961, "epoch": 4.94, "learning_rate": 2.8083028083028086e-05, "loss": 109.7843, "step": 5850, "task_loss": 2.5113155841827393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999935847596021, "compression/movement_sparsity/importance_threshold": -4.493064255112894e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247028163548449, "compression/movement_sparsity/model_sparsity": 0.892936429894457, "compression_loss": 105.52645874023438, "distillation_loss": 3.549377679824829, "epoch": 4.95, "learning_rate": 2.8078331924485772e-05, "loss": 109.5383, "step": 5851, "task_loss": 2.607452869415283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999938719655429, "compression/movement_sparsity/importance_threshold": -4.291912830255845e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247228489564732, "compression/movement_sparsity/model_sparsity": 0.8929557743145918, "compression_loss": 105.52588653564453, "distillation_loss": 4.412120819091797, "epoch": 4.95, "learning_rate": 2.8073635765943462e-05, "loss": 109.8058, "step": 5852, "task_loss": 1.9534404277801514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999941504689496, "compression/movement_sparsity/importance_threshold": -4.0968564296237076e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247458029791722, "compression/movement_sparsity/model_sparsity": 0.8929779397959963, "compression_loss": 105.52539825439453, "distillation_loss": 2.6091818809509277, "epoch": 4.95, "learning_rate": 2.8068939607401145e-05, "loss": 108.4475, "step": 5853, "task_loss": 0.8372893333435059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999944204037072, "compression/movement_sparsity/importance_threshold": -3.907801283519613e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247926053371429, "compression/movement_sparsity/model_sparsity": 0.8930231343489898, "compression_loss": 105.52485656738281, "distillation_loss": 5.425722122192383, "epoch": 4.95, "learning_rate": 2.8064243448858834e-05, "loss": 109.4549, "step": 5854, "task_loss": 2.500831127166748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999946819037011, "compression/movement_sparsity/importance_threshold": -3.724653622246693e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247769369808695, "compression/movement_sparsity/model_sparsity": 0.8930080042489559, "compression_loss": 105.52434539794922, "distillation_loss": 3.830941915512085, "epoch": 4.95, "learning_rate": 2.8059547290316524e-05, "loss": 108.8586, "step": 5855, "task_loss": 1.867904782295227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999949351028162, "compression/movement_sparsity/importance_threshold": -3.547319676368288e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247463514908835, "compression/movement_sparsity/model_sparsity": 0.8929784694646429, "compression_loss": 105.52375030517578, "distillation_loss": 3.827970027923584, "epoch": 4.95, "learning_rate": 2.8054851131774214e-05, "loss": 108.9452, "step": 5856, "task_loss": 2.050349235534668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999951801349377, "compression/movement_sparsity/importance_threshold": -3.3757056762742654e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924760314691185, "compression/movement_sparsity/model_sparsity": 0.8929919529860583, "compression_loss": 105.52322387695312, "distillation_loss": 2.530643939971924, "epoch": 4.95, "learning_rate": 2.8050154973231897e-05, "loss": 109.7302, "step": 5857, "task_loss": 1.3130748271942139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999954171339508, "compression/movement_sparsity/importance_threshold": -3.209717852267757e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247861305141167, "compression/movement_sparsity/model_sparsity": 0.8930168819560534, "compression_loss": 105.52264404296875, "distillation_loss": 3.407151222229004, "epoch": 4.95, "learning_rate": 2.8045458814689583e-05, "loss": 108.642, "step": 5858, "task_loss": 1.5685487985610962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999956462337406, "compression/movement_sparsity/importance_threshold": -3.049262434738631e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247675645851077, "compression/movement_sparsity/model_sparsity": 0.8929989538238213, "compression_loss": 105.52210235595703, "distillation_loss": 4.470716953277588, "epoch": 4.95, "learning_rate": 2.8040762656147273e-05, "loss": 109.5695, "step": 5859, "task_loss": 2.057111978530884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999958675681923, "compression/movement_sparsity/importance_threshold": -2.894245654163491e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924785462760729, "compression/movement_sparsity/model_sparsity": 0.8930162371420489, "compression_loss": 105.52153778076172, "distillation_loss": 5.06801176071167, "epoch": 4.95, "learning_rate": 2.8036066497604963e-05, "loss": 109.2933, "step": 5860, "task_loss": 2.635833501815796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999960812711909, "compression/movement_sparsity/importance_threshold": -2.7445737409322046e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247773900992396, "compression/movement_sparsity/model_sparsity": 0.893008441801316, "compression_loss": 105.52100372314453, "distillation_loss": 4.725068092346191, "epoch": 4.95, "learning_rate": 2.8031370339062646e-05, "loss": 109.5713, "step": 5861, "task_loss": 3.1156582832336426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999962874766216, "compression/movement_sparsity/importance_threshold": -2.600152925347904e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247800849611253, "compression/movement_sparsity/model_sparsity": 0.8930110440864056, "compression_loss": 105.52043151855469, "distillation_loss": 3.3206117153167725, "epoch": 4.95, "learning_rate": 2.8026674180520335e-05, "loss": 109.3429, "step": 5862, "task_loss": 1.3587496280670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999964863183696, "compression/movement_sparsity/importance_threshold": -2.4608894379739288e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9248166444590968, "compression/movement_sparsity/model_sparsity": 0.8930463476531516, "compression_loss": 105.51992797851562, "distillation_loss": 2.6819381713867188, "epoch": 4.96, "learning_rate": 2.8021978021978025e-05, "loss": 109.293, "step": 5863, "task_loss": 2.0624547004699707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99999667793032, "compression/movement_sparsity/importance_threshold": -2.326689509113411e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9248115170670135, "compression/movement_sparsity/model_sparsity": 0.89304139640276, "compression_loss": 105.51939392089844, "distillation_loss": 4.075498580932617, "epoch": 4.96, "learning_rate": 2.801728186343571e-05, "loss": 109.0327, "step": 5864, "task_loss": 1.5036287307739258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999996862446358, "compression/movement_sparsity/importance_threshold": -2.197459369069482e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247899939444307, "compression/movement_sparsity/model_sparsity": 0.8930206126656508, "compression_loss": 105.51885986328125, "distillation_loss": 3.6778221130371094, "epoch": 4.96, "learning_rate": 2.8012585704893394e-05, "loss": 109.2772, "step": 5865, "task_loss": 3.221446990966797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999970400003686, "compression/movement_sparsity/importance_threshold": -2.0731052484054824e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247336522523513, "compression/movement_sparsity/model_sparsity": 0.8929662064840217, "compression_loss": 105.5183334350586, "distillation_loss": 4.524487495422363, "epoch": 4.96, "learning_rate": 2.8007889546351084e-05, "loss": 110.2112, "step": 5866, "task_loss": 1.9631383419036865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999972107262372, "compression/movement_sparsity/importance_threshold": -1.9535333774245434e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9247798822502755, "compression/movement_sparsity/model_sparsity": 0.8930108483392971, "compression_loss": 105.51778411865234, "distillation_loss": 3.589909076690674, "epoch": 4.96, "learning_rate": 2.8003193387808774e-05, "loss": 109.4291, "step": 5867, "task_loss": 2.675635814666748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999973747578486, "compression/movement_sparsity/importance_threshold": -1.838649986603269e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9248069739591441, "compression/movement_sparsity/model_sparsity": 0.8930370093646223, "compression_loss": 105.51721954345703, "distillation_loss": 4.056374549865723, "epoch": 4.96, "learning_rate": 2.7998497229266464e-05, "loss": 110.3129, "step": 5868, "task_loss": 2.75839900970459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999975322290882, "compression/movement_sparsity/importance_threshold": -1.728361306244791e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9248404093251952, "compression/movement_sparsity/model_sparsity": 0.8930692961229901, "compression_loss": 105.5167236328125, "distillation_loss": 4.368237495422363, "epoch": 4.96, "learning_rate": 2.7993801070724153e-05, "loss": 109.1265, "step": 5869, "task_loss": 2.9064948558807373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999976832738409, "compression/movement_sparsity/importance_threshold": -1.6225735668257135e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249473214122182, "compression/movement_sparsity/model_sparsity": 0.893172535450924, "compression_loss": 105.51616668701172, "distillation_loss": 4.509244918823242, "epoch": 4.96, "learning_rate": 2.7989104912181836e-05, "loss": 109.1984, "step": 5870, "task_loss": 2.7013630867004395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999978280259921, "compression/movement_sparsity/importance_threshold": -1.521192998649168e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249166286047236, "compression/movement_sparsity/model_sparsity": 0.8931428970357889, "compression_loss": 105.51564025878906, "distillation_loss": 3.6194417476654053, "epoch": 4.96, "learning_rate": 2.7984408753639523e-05, "loss": 108.8022, "step": 5871, "task_loss": 1.9047770500183105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999979666194269, "compression/movement_sparsity/importance_threshold": -1.4241258321917583e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249165093630473, "compression/movement_sparsity/model_sparsity": 0.893142781890431, "compression_loss": 105.51509857177734, "distillation_loss": 4.460231781005859, "epoch": 4.96, "learning_rate": 2.7979712595097212e-05, "loss": 109.9072, "step": 5872, "task_loss": 2.05206561088562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999980991880303, "compression/movement_sparsity/importance_threshold": -1.3312782977566162e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.924910261099206, "compression/movement_sparsity/model_sparsity": 0.8931367482736746, "compression_loss": 105.51454162597656, "distillation_loss": 3.354055643081665, "epoch": 4.96, "learning_rate": 2.7975016436554902e-05, "loss": 108.8324, "step": 5873, "task_loss": 1.7151175737380981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999982258656874, "compression/movement_sparsity/importance_threshold": -1.2425566259938181e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249130752027681, "compression/movement_sparsity/model_sparsity": 0.8931394657041221, "compression_loss": 105.513916015625, "distillation_loss": 2.9095869064331055, "epoch": 4.96, "learning_rate": 2.7970320278012585e-05, "loss": 108.7118, "step": 5874, "task_loss": 1.9037086963653564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999983467862836, "compression/movement_sparsity/importance_threshold": -1.157867046946287e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249651003461633, "compression/movement_sparsity/model_sparsity": 0.8931897036237937, "compression_loss": 105.51336669921875, "distillation_loss": 4.811957836151123, "epoch": 4.97, "learning_rate": 2.7965624119470275e-05, "loss": 109.5832, "step": 5875, "task_loss": 2.759206771850586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999984620837039, "compression/movement_sparsity/importance_threshold": -1.0771157912640994e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9249788846839504, "compression/movement_sparsity/model_sparsity": 0.8932030144271722, "compression_loss": 105.51270294189453, "distillation_loss": 3.9048945903778076, "epoch": 4.97, "learning_rate": 2.7960927960927964e-05, "loss": 109.1663, "step": 5876, "task_loss": 1.7396193742752075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999985718918334, "compression/movement_sparsity/importance_threshold": -1.0002090892503868e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.925012463140013, "compression/movement_sparsity/model_sparsity": 0.8932354393599696, "compression_loss": 105.51212310791016, "distillation_loss": 3.7623343467712402, "epoch": 4.97, "learning_rate": 2.795623180238565e-05, "loss": 109.2077, "step": 5877, "task_loss": 1.5687006711959839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999986763445573, "compression/movement_sparsity/importance_threshold": -9.27053171295017e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249954831252996, "compression/movement_sparsity/model_sparsity": 0.8932190426609982, "compression_loss": 105.51154327392578, "distillation_loss": 4.212191104888916, "epoch": 4.97, "learning_rate": 2.7951535643843334e-05, "loss": 109.4948, "step": 5878, "task_loss": 1.865241289138794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999987755757607, "compression/movement_sparsity/importance_threshold": -8.57554267874594e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.924959054793172, "compression/movement_sparsity/model_sparsity": 0.8931838657541459, "compression_loss": 105.51097106933594, "distillation_loss": 4.179649353027344, "epoch": 4.97, "learning_rate": 2.7946839485301023e-05, "loss": 109.6506, "step": 5879, "task_loss": 3.6693124771118164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999988697193287, "compression/movement_sparsity/importance_threshold": -7.916186092922495e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249164974388796, "compression/movement_sparsity/model_sparsity": 0.8931427703758952, "compression_loss": 105.51036834716797, "distillation_loss": 3.6689977645874023, "epoch": 4.97, "learning_rate": 2.7942143326758713e-05, "loss": 109.9064, "step": 5880, "task_loss": 1.5843734741210938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999989589091466, "compression/movement_sparsity/importance_threshold": -7.291524260245874e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249577192863968, "compression/movement_sparsity/model_sparsity": 0.8931825761261368, "compression_loss": 105.50984954833984, "distillation_loss": 3.979867458343506, "epoch": 4.97, "learning_rate": 2.7937447168216403e-05, "loss": 109.0381, "step": 5881, "task_loss": 2.6386313438415527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999990432790993, "compression/movement_sparsity/importance_threshold": -6.700619484614756e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249835589576637, "compression/movement_sparsity/model_sparsity": 0.8932075281252037, "compression_loss": 105.50929260253906, "distillation_loss": 3.2594900131225586, "epoch": 4.97, "learning_rate": 2.7932751009674086e-05, "loss": 109.1922, "step": 5882, "task_loss": 1.8288002014160156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999991229630721, "compression/movement_sparsity/importance_threshold": -6.1425340699278186e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.924989211013123, "compression/movement_sparsity/model_sparsity": 0.8932129860151702, "compression_loss": 105.50875854492188, "distillation_loss": 4.670947551727295, "epoch": 4.97, "learning_rate": 2.7928054851131776e-05, "loss": 109.7989, "step": 5883, "task_loss": 3.5299787521362305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999991980949503, "compression/movement_sparsity/importance_threshold": -5.6163303183490165e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249779426747072, "compression/movement_sparsity/model_sparsity": 0.8932021047788444, "compression_loss": 105.50823211669922, "distillation_loss": 3.950338840484619, "epoch": 4.97, "learning_rate": 2.7923358692589462e-05, "loss": 109.2451, "step": 5884, "task_loss": 2.941614866256714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999992688086187, "compression/movement_sparsity/importance_threshold": -5.121070537246475e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249505886341505, "compression/movement_sparsity/model_sparsity": 0.8931756904337317, "compression_loss": 105.50763702392578, "distillation_loss": 4.852612018585205, "epoch": 4.97, "learning_rate": 2.791866253404715e-05, "loss": 109.0179, "step": 5885, "task_loss": 1.847052812576294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999993352379626, "compression/movement_sparsity/importance_threshold": -4.6558170279167865e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249317722976211, "compression/movement_sparsity/model_sparsity": 0.893157520496248, "compression_loss": 105.50706481933594, "distillation_loss": 5.089334487915039, "epoch": 4.97, "learning_rate": 2.791396637550484e-05, "loss": 109.5355, "step": 5886, "task_loss": 2.56630277633667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999993975168672, "compression/movement_sparsity/importance_threshold": -4.219632095993353e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249224595226975, "compression/movement_sparsity/model_sparsity": 0.8931485276437924, "compression_loss": 105.50646209716797, "distillation_loss": 3.3884177207946777, "epoch": 4.98, "learning_rate": 2.7909270216962524e-05, "loss": 108.9685, "step": 5887, "task_loss": 2.3193747997283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999994557792176, "compression/movement_sparsity/importance_threshold": -3.811578043640129e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249176898556432, "compression/movement_sparsity/model_sparsity": 0.8931439218294746, "compression_loss": 105.50586700439453, "distillation_loss": 4.13754940032959, "epoch": 4.98, "learning_rate": 2.7904574058420214e-05, "loss": 109.9003, "step": 5888, "task_loss": 2.5789523124694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999995101588989, "compression/movement_sparsity/importance_threshold": -3.4307171756231547e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249341213586454, "compression/movement_sparsity/model_sparsity": 0.8931597888597994, "compression_loss": 105.50529479980469, "distillation_loss": 4.677050590515137, "epoch": 4.98, "learning_rate": 2.78998778998779e-05, "loss": 109.0332, "step": 5889, "task_loss": 2.284461736679077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999995607897962, "compression/movement_sparsity/importance_threshold": -3.076111797575831e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249663285354298, "compression/movement_sparsity/model_sparsity": 0.8931908896209805, "compression_loss": 105.50463104248047, "distillation_loss": 3.4984352588653564, "epoch": 4.98, "learning_rate": 2.789518174133559e-05, "loss": 109.745, "step": 5890, "task_loss": 2.1903717517852783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999996078057948, "compression/movement_sparsity/importance_threshold": -2.7468242107947516e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.925006799160386, "compression/movement_sparsity/model_sparsity": 0.8932299699554672, "compression_loss": 105.5040283203125, "distillation_loss": 1.9705235958099365, "epoch": 4.98, "learning_rate": 2.7890485582793273e-05, "loss": 108.7467, "step": 5891, "task_loss": 0.6571514010429382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999996513407796, "compression/movement_sparsity/importance_threshold": -2.441916721780679e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250161119353095, "compression/movement_sparsity/model_sparsity": 0.8932389628079227, "compression_loss": 105.50341796875, "distillation_loss": 3.0539584159851074, "epoch": 4.98, "learning_rate": 2.7885789424250963e-05, "loss": 109.6007, "step": 5892, "task_loss": 1.979411005973816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999691528636, "compression/movement_sparsity/importance_threshold": -2.1604516326975687e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250263428711412, "compression/movement_sparsity/model_sparsity": 0.8932488422796344, "compression_loss": 105.50274658203125, "distillation_loss": 2.814729690551758, "epoch": 4.98, "learning_rate": 2.7881093265708653e-05, "loss": 109.698, "step": 5893, "task_loss": 1.5163878202438354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999728503249, "compression/movement_sparsity/importance_threshold": -1.90149124831146e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9249927763392461, "compression/movement_sparsity/model_sparsity": 0.8932164288613728, "compression_loss": 105.50222778320312, "distillation_loss": 2.719080924987793, "epoch": 4.98, "learning_rate": 2.7876397107166342e-05, "loss": 108.7807, "step": 5894, "task_loss": 1.2525080442428589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999997623985037, "compression/movement_sparsity/importance_threshold": -1.664097872521031e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250131905142388, "compression/movement_sparsity/model_sparsity": 0.8932361417466531, "compression_loss": 105.5015640258789, "distillation_loss": 3.8779735565185547, "epoch": 4.98, "learning_rate": 2.7871700948624025e-05, "loss": 108.9899, "step": 5895, "task_loss": 2.3458926677703857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999997933482855, "compression/movement_sparsity/importance_threshold": -1.4473338083575982e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250223721233184, "compression/movement_sparsity/model_sparsity": 0.8932450079392149, "compression_loss": 105.5009765625, "distillation_loss": 5.491689682006836, "epoch": 4.98, "learning_rate": 2.7867004790081715e-05, "loss": 109.5036, "step": 5896, "task_loss": 2.5188772678375244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998214864791, "compression/movement_sparsity/importance_threshold": -1.2502613614545632e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250948472142092, "compression/movement_sparsity/model_sparsity": 0.8933149932877741, "compression_loss": 105.50039672851562, "distillation_loss": 4.8426384925842285, "epoch": 4.98, "learning_rate": 2.78623086315394e-05, "loss": 109.679, "step": 5897, "task_loss": 2.037792921066284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998469469701, "compression/movement_sparsity/importance_threshold": -1.0719428348432425e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9250792623271091, "compression/movement_sparsity/model_sparsity": 0.8932999437894906, "compression_loss": 105.499755859375, "distillation_loss": 2.8470168113708496, "epoch": 4.99, "learning_rate": 2.785761247299709e-05, "loss": 109.256, "step": 5898, "task_loss": 1.5510737895965576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998698636433, "compression/movement_sparsity/importance_threshold": -9.11440532422314e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9250863214343495, "compression/movement_sparsity/model_sparsity": 0.8933067603946809, "compression_loss": 105.4991226196289, "distillation_loss": 3.4297666549682617, "epoch": 4.99, "learning_rate": 2.785291631445478e-05, "loss": 109.2113, "step": 5899, "task_loss": 3.621725082397461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999998903703841, "compression/movement_sparsity/importance_threshold": -7.67816758090456e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9251470035234485, "compression/movement_sparsity/model_sparsity": 0.8933653578673394, "compression_loss": 105.49853515625, "distillation_loss": 4.156139373779297, "epoch": 4.99, "learning_rate": 2.7848220155912464e-05, "loss": 109.5652, "step": 5900, "task_loss": 3.342054843902588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999086010773, "compression/movement_sparsity/importance_threshold": -6.4013381748107e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9251133773707153, "compression/movement_sparsity/model_sparsity": 0.8933328868763988, "compression_loss": 105.49789428710938, "distillation_loss": 2.710592746734619, "epoch": 4.99, "learning_rate": 2.7843523997370153e-05, "loss": 108.9363, "step": 5901, "task_loss": 1.3197705745697021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999246896084, "compression/movement_sparsity/importance_threshold": -5.274540118907489e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9250882293011713, "compression/movement_sparsity/model_sparsity": 0.8933086027204081, "compression_loss": 105.49723052978516, "distillation_loss": 4.235291481018066, "epoch": 4.99, "learning_rate": 2.783882783882784e-05, "loss": 109.3602, "step": 5902, "task_loss": 2.227602243423462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999387698624, "compression/movement_sparsity/importance_threshold": -4.2883964695289434e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.92510654482266, "compression/movement_sparsity/model_sparsity": 0.8933262890473885, "compression_loss": 105.49665069580078, "distillation_loss": 4.263698577880859, "epoch": 4.99, "learning_rate": 2.783413168028553e-05, "loss": 109.1425, "step": 5903, "task_loss": 2.08604097366333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999509757244, "compression/movement_sparsity/importance_threshold": -3.433530265661844e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9251375715068485, "compression/movement_sparsity/model_sparsity": 0.8933562498695259, "compression_loss": 105.49600219726562, "distillation_loss": 5.044488906860352, "epoch": 4.99, "learning_rate": 2.7829435521743212e-05, "loss": 108.9655, "step": 5904, "task_loss": 3.1151111125946045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999614410795, "compression/movement_sparsity/importance_threshold": -2.7005645376193543e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.92512212970976, "compression/movement_sparsity/model_sparsity": 0.893341338545672, "compression_loss": 105.495361328125, "distillation_loss": 3.8421998023986816, "epoch": 4.99, "learning_rate": 2.7824739363200902e-05, "loss": 109.0103, "step": 5905, "task_loss": 2.1294994354248047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.999999970299813, "compression/movement_sparsity/importance_threshold": -2.0801223330618734e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9251399324920404, "compression/movement_sparsity/model_sparsity": 0.8933585297476132, "compression_loss": 105.4947738647461, "distillation_loss": 3.687211036682129, "epoch": 4.99, "learning_rate": 2.7820043204658592e-05, "loss": 109.3556, "step": 5906, "task_loss": 1.873538613319397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99999997768581, "compression/movement_sparsity/importance_threshold": -1.5628266996497997e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9251245264674548, "compression/movement_sparsity/model_sparsity": 0.8933436529673667, "compression_loss": 105.4941177368164, "distillation_loss": 5.711050510406494, "epoch": 4.99, "learning_rate": 2.781534704611628e-05, "loss": 110.006, "step": 5907, "task_loss": 3.399139642715454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999837329554, "compression/movement_sparsity/importance_threshold": -1.1393006676962969e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9252315577961544, "compression/movement_sparsity/model_sparsity": 0.8934470074406585, "compression_loss": 105.49349975585938, "distillation_loss": 5.6282243728637695, "epoch": 4.99, "learning_rate": 2.7810650887573965e-05, "loss": 110.0112, "step": 5908, "task_loss": 3.2896342277526855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999885751346, "compression/movement_sparsity/importance_threshold": -8.001672761881462e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.925334952253725, "compression/movement_sparsity/model_sparsity": 0.8935468499805329, "compression_loss": 105.492919921875, "distillation_loss": 4.258896827697754, "epoch": 4.99, "learning_rate": 2.780595472903165e-05, "loss": 109.4165, "step": 5909, "task_loss": 2.5525002479553223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999923462328, "compression/movement_sparsity/importance_threshold": -5.360495554385114e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9253391495607327, "compression/movement_sparsity/model_sparsity": 0.8935509030971326, "compression_loss": 105.4923095703125, "distillation_loss": 3.679643154144287, "epoch": 5.0, "learning_rate": 2.780125857048934e-05, "loss": 109.1786, "step": 5910, "task_loss": 1.9977991580963135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999951801349, "compression/movement_sparsity/importance_threshold": -3.375705704550258e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9253683399231053, "compression/movement_sparsity/model_sparsity": 0.8935790906807577, "compression_loss": 105.49173736572266, "distillation_loss": 3.327390670776367, "epoch": 5.0, "learning_rate": 2.779656241194703e-05, "loss": 109.316, "step": 5911, "task_loss": 2.482755661010742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999972107262, "compression/movement_sparsity/importance_threshold": -1.9535334287723582e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9253979237830099, "compression/movement_sparsity/model_sparsity": 0.893607658244064, "compression_loss": 105.49112701416016, "distillation_loss": 5.1300048828125, "epoch": 5.0, "learning_rate": 2.7791866253404713e-05, "loss": 110.1546, "step": 5912, "task_loss": 2.7778573036193848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999985718918, "compression/movement_sparsity/importance_threshold": -1.0002091169192262e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9253641783886004, "compression/movement_sparsity/model_sparsity": 0.8935750721077653, "compression_loss": 105.49063110351562, "distillation_loss": 4.768748760223389, "epoch": 5.0, "learning_rate": 2.7787170094862403e-05, "loss": 109.218, "step": 5913, "task_loss": 2.8526062965393066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999993975168, "compression/movement_sparsity/importance_threshold": -4.219632455948474e-12, "compression/movement_sparsity/linear_layer_sparsity": 0.9253416774842715, "compression/movement_sparsity/model_sparsity": 0.8935533441787211, "compression_loss": 105.49003601074219, "distillation_loss": 4.018354415893555, "epoch": 5.0, "learning_rate": 2.7782473936320093e-05, "loss": 109.3875, "step": 5914, "task_loss": 2.262068271636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9999999998214865, "compression/movement_sparsity/importance_threshold": -1.2502611793085983e-12, "compression/movement_sparsity/linear_layer_sparsity": 0.9254309775756965, "compression/movement_sparsity/model_sparsity": 0.8936395765372864, "compression_loss": 105.48946380615234, "distillation_loss": 3.139014720916748, "epoch": 5.0, "learning_rate": 2.777777777777778e-05, "loss": 108.7148, "step": 5915, "task_loss": 1.5968846082687378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 9.107519149780273, "epoch": 5.0, "learning_rate": 2.777308161923547e-05, "loss": 89.7272, "step": 5916, "task_loss": 3.68790864944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 5.998353004455566, "epoch": 5.0, "learning_rate": 2.7768385460693152e-05, "loss": 7.0484, "step": 5917, "task_loss": 3.254852771759033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 5.807075500488281, "epoch": 5.0, "learning_rate": 2.776368930215084e-05, "loss": 5.5135, "step": 5918, "task_loss": 3.0269157886505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 4.923386096954346, "epoch": 5.0, "learning_rate": 2.775899314360853e-05, "loss": 5.2188, "step": 5919, "task_loss": 1.4955283403396606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 4.83468770980835, "epoch": 5.0, "learning_rate": 2.775429698506622e-05, "loss": 4.3778, "step": 5920, "task_loss": 2.905348062515259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 4.057333469390869, "epoch": 5.01, "learning_rate": 2.7749600826523904e-05, "loss": 4.1993, "step": 5921, "task_loss": 1.8149784803390503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 4.103086948394775, "epoch": 5.01, "learning_rate": 2.774490466798159e-05, "loss": 3.5052, "step": 5922, "task_loss": 2.137714385986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.798582077026367, "epoch": 5.01, "learning_rate": 2.774020850943928e-05, "loss": 3.6085, "step": 5923, "task_loss": 1.8862818479537964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 3.7657337188720703, "epoch": 5.01, "learning_rate": 2.773551235089697e-05, "loss": 3.7961, "step": 5924, "task_loss": 2.2626469135284424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 3.1489675045013428, "epoch": 5.01, "learning_rate": 2.7730816192354653e-05, "loss": 2.9824, "step": 5925, "task_loss": 1.7105740308761597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6984586715698242, "epoch": 5.01, "learning_rate": 2.7726120033812342e-05, "loss": 2.7759, "step": 5926, "task_loss": 1.1583797931671143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.3696179389953613, "epoch": 5.01, "learning_rate": 2.7721423875270032e-05, "loss": 2.3709, "step": 5927, "task_loss": 2.0790762901306152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.175184726715088, "epoch": 5.01, "learning_rate": 2.771672771672772e-05, "loss": 2.3906, "step": 5928, "task_loss": 1.166720986366272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 3.792428970336914, "epoch": 5.01, "learning_rate": 2.7712031558185408e-05, "loss": 2.5864, "step": 5929, "task_loss": 2.224179267883301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.52396821975708, "epoch": 5.01, "learning_rate": 2.770733539964309e-05, "loss": 2.4367, "step": 5930, "task_loss": 1.4490300416946411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.095733642578125, "epoch": 5.01, "learning_rate": 2.770263924110078e-05, "loss": 2.6712, "step": 5931, "task_loss": 2.071390151977539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.496537685394287, "epoch": 5.01, "learning_rate": 2.769794308255847e-05, "loss": 2.2837, "step": 5932, "task_loss": 0.7397240400314331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.4572761058807373, "epoch": 5.02, "learning_rate": 2.7693246924016157e-05, "loss": 2.173, "step": 5933, "task_loss": 1.1359903812408447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.441149950027466, "epoch": 5.02, "learning_rate": 2.7688550765473843e-05, "loss": 2.6266, "step": 5934, "task_loss": 2.4219510555267334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.9012343883514404, "epoch": 5.02, "learning_rate": 2.768385460693153e-05, "loss": 2.2335, "step": 5935, "task_loss": 1.1415892839431763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8224756717681885, "epoch": 5.02, "learning_rate": 2.767915844838922e-05, "loss": 2.1349, "step": 5936, "task_loss": 0.7082262635231018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8892982006072998, "epoch": 5.02, "learning_rate": 2.767446228984691e-05, "loss": 1.9177, "step": 5937, "task_loss": 1.2918776273727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.3299925327301025, "epoch": 5.02, "learning_rate": 2.7669766131304592e-05, "loss": 2.2113, "step": 5938, "task_loss": 1.7329392433166504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7745351791381836, "epoch": 5.02, "learning_rate": 2.7665069972762282e-05, "loss": 1.8083, "step": 5939, "task_loss": 2.4126667976379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.226346254348755, "epoch": 5.02, "learning_rate": 2.7660373814219968e-05, "loss": 2.322, "step": 5940, "task_loss": 1.2096538543701172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.5984599590301514, "epoch": 5.02, "learning_rate": 2.7655677655677658e-05, "loss": 2.3425, "step": 5941, "task_loss": 1.8854190111160278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.4422881603240967, "epoch": 5.02, "learning_rate": 2.765098149713534e-05, "loss": 2.066, "step": 5942, "task_loss": 2.004627227783203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.118638038635254, "epoch": 5.02, "learning_rate": 2.764628533859303e-05, "loss": 2.2797, "step": 5943, "task_loss": 1.4140465259552002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.347839117050171, "epoch": 5.02, "learning_rate": 2.764158918005072e-05, "loss": 2.062, "step": 5944, "task_loss": 0.9017975330352783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.8050596714019775, "epoch": 5.03, "learning_rate": 2.763689302150841e-05, "loss": 2.4213, "step": 5945, "task_loss": 1.5031167268753052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4305057525634766, "epoch": 5.03, "learning_rate": 2.7632196862966096e-05, "loss": 1.3526, "step": 5946, "task_loss": 0.7485719323158264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.689972162246704, "epoch": 5.03, "learning_rate": 2.7627500704423783e-05, "loss": 2.103, "step": 5947, "task_loss": 0.5695490837097168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.046323776245117, "epoch": 5.03, "learning_rate": 2.762280454588147e-05, "loss": 1.4623, "step": 5948, "task_loss": 2.094740390777588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.362403154373169, "epoch": 5.03, "learning_rate": 2.761810838733916e-05, "loss": 1.8346, "step": 5949, "task_loss": 1.1634949445724487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.498169422149658, "epoch": 5.03, "learning_rate": 2.761341222879685e-05, "loss": 1.8253, "step": 5950, "task_loss": 1.6829357147216797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9084558486938477, "epoch": 5.03, "learning_rate": 2.760871607025453e-05, "loss": 2.0019, "step": 5951, "task_loss": 1.7362726926803589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0300230979919434, "epoch": 5.03, "learning_rate": 2.760401991171222e-05, "loss": 1.6678, "step": 5952, "task_loss": 1.5908313989639282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9563615322113037, "epoch": 5.03, "learning_rate": 2.7599323753169907e-05, "loss": 1.8268, "step": 5953, "task_loss": 1.6966561079025269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.2024641036987305, "epoch": 5.03, "learning_rate": 2.7594627594627597e-05, "loss": 1.7741, "step": 5954, "task_loss": 1.0555740594863892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9592430591583252, "epoch": 5.03, "learning_rate": 2.758993143608528e-05, "loss": 1.8663, "step": 5955, "task_loss": 1.6705167293548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0182042121887207, "epoch": 5.03, "learning_rate": 2.758523527754297e-05, "loss": 1.8086, "step": 5956, "task_loss": 1.6772360801696777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3420097827911377, "epoch": 5.04, "learning_rate": 2.758053911900066e-05, "loss": 1.8859, "step": 5957, "task_loss": 0.886572003364563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7672629356384277, "epoch": 5.04, "learning_rate": 2.757584296045835e-05, "loss": 1.792, "step": 5958, "task_loss": 1.9365952014923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8932313919067383, "epoch": 5.04, "learning_rate": 2.7571146801916032e-05, "loss": 2.0352, "step": 5959, "task_loss": 2.4446828365325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2710652351379395, "epoch": 5.04, "learning_rate": 2.756645064337372e-05, "loss": 1.574, "step": 5960, "task_loss": 0.9964640140533447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.934610366821289, "epoch": 5.04, "learning_rate": 2.756175448483141e-05, "loss": 1.9456, "step": 5961, "task_loss": 0.6763312816619873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.792356252670288, "epoch": 5.04, "learning_rate": 2.7557058326289098e-05, "loss": 1.7807, "step": 5962, "task_loss": 0.8963178396224976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4659570455551147, "epoch": 5.04, "learning_rate": 2.7552362167746788e-05, "loss": 1.8935, "step": 5963, "task_loss": 1.4169306755065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4013087749481201, "epoch": 5.04, "learning_rate": 2.754766600920447e-05, "loss": 1.3332, "step": 5964, "task_loss": 0.8059857487678528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3107569217681885, "epoch": 5.04, "learning_rate": 2.754296985066216e-05, "loss": 1.5869, "step": 5965, "task_loss": 1.4338014125823975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.1250243186950684, "epoch": 5.04, "learning_rate": 2.7538273692119847e-05, "loss": 2.1255, "step": 5966, "task_loss": 1.3717212677001953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.465025544166565, "epoch": 5.04, "learning_rate": 2.7533577533577537e-05, "loss": 1.7426, "step": 5967, "task_loss": 1.0294339656829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.2154417037963867, "epoch": 5.04, "learning_rate": 2.752888137503522e-05, "loss": 1.9514, "step": 5968, "task_loss": 1.8215508460998535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.6885862350463867, "epoch": 5.05, "learning_rate": 2.752418521649291e-05, "loss": 1.6836, "step": 5969, "task_loss": 2.9758992195129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6610649824142456, "epoch": 5.05, "learning_rate": 2.75194890579506e-05, "loss": 1.6842, "step": 5970, "task_loss": 1.486685037612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0938032865524292, "epoch": 5.05, "learning_rate": 2.751479289940829e-05, "loss": 1.3269, "step": 5971, "task_loss": 0.9728466272354126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8419153690338135, "epoch": 5.05, "learning_rate": 2.751009674086597e-05, "loss": 1.7515, "step": 5972, "task_loss": 1.336808204650879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1511118412017822, "epoch": 5.05, "learning_rate": 2.7505400582323658e-05, "loss": 1.2678, "step": 5973, "task_loss": 0.34537604451179504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6735496520996094, "epoch": 5.05, "learning_rate": 2.7500704423781348e-05, "loss": 2.2292, "step": 5974, "task_loss": 1.4412600994110107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.815626621246338, "epoch": 5.05, "learning_rate": 2.7496008265239037e-05, "loss": 1.8289, "step": 5975, "task_loss": 1.2378538846969604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6196995973587036, "epoch": 5.05, "learning_rate": 2.7491312106696727e-05, "loss": 1.5102, "step": 5976, "task_loss": 2.0097298622131348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7491836547851562, "epoch": 5.05, "learning_rate": 2.748661594815441e-05, "loss": 1.5927, "step": 5977, "task_loss": 1.5157792568206787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9714813232421875, "epoch": 5.05, "learning_rate": 2.74819197896121e-05, "loss": 1.8374, "step": 5978, "task_loss": 1.2396095991134644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8989825248718262, "epoch": 5.05, "learning_rate": 2.7477223631069786e-05, "loss": 1.515, "step": 5979, "task_loss": 1.158367395401001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.731797456741333, "epoch": 5.05, "learning_rate": 2.7472527472527476e-05, "loss": 1.1383, "step": 5980, "task_loss": 0.4815087914466858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4883168935775757, "epoch": 5.06, "learning_rate": 2.746783131398516e-05, "loss": 1.401, "step": 5981, "task_loss": 1.5791443586349487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.933860421180725, "epoch": 5.06, "learning_rate": 2.746313515544285e-05, "loss": 1.5846, "step": 5982, "task_loss": 0.7577766180038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3321192264556885, "epoch": 5.06, "learning_rate": 2.7458438996900538e-05, "loss": 1.3451, "step": 5983, "task_loss": 0.5090982913970947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0332798957824707, "epoch": 5.06, "learning_rate": 2.7453742838358225e-05, "loss": 2.1848, "step": 5984, "task_loss": 1.3390953540802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4984904527664185, "epoch": 5.06, "learning_rate": 2.744904667981591e-05, "loss": 1.414, "step": 5985, "task_loss": 0.9739983081817627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6785483360290527, "epoch": 5.06, "learning_rate": 2.7444350521273597e-05, "loss": 1.4638, "step": 5986, "task_loss": 1.3667768239974976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.669451117515564, "epoch": 5.06, "learning_rate": 2.7439654362731287e-05, "loss": 1.8552, "step": 5987, "task_loss": 1.5857038497924805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5099369287490845, "epoch": 5.06, "learning_rate": 2.7434958204188977e-05, "loss": 1.6199, "step": 5988, "task_loss": 1.3627605438232422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.477861762046814, "epoch": 5.06, "learning_rate": 2.743026204564666e-05, "loss": 1.6036, "step": 5989, "task_loss": 1.4046006202697754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1466853618621826, "epoch": 5.06, "learning_rate": 2.742556588710435e-05, "loss": 1.4119, "step": 5990, "task_loss": 0.7919737100601196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1878602504730225, "epoch": 5.06, "learning_rate": 2.742086972856204e-05, "loss": 1.5422, "step": 5991, "task_loss": 1.6900066137313843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6206276416778564, "epoch": 5.07, "learning_rate": 2.7416173570019726e-05, "loss": 1.4602, "step": 5992, "task_loss": 0.4422968327999115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.222472667694092, "epoch": 5.07, "learning_rate": 2.7411477411477415e-05, "loss": 1.6409, "step": 5993, "task_loss": 1.4876128435134888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.1480159759521484, "epoch": 5.07, "learning_rate": 2.7406781252935098e-05, "loss": 1.6129, "step": 5994, "task_loss": 1.3178582191467285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5628468990325928, "epoch": 5.07, "learning_rate": 2.7402085094392788e-05, "loss": 1.988, "step": 5995, "task_loss": 1.6689242124557495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.068885087966919, "epoch": 5.07, "learning_rate": 2.7397388935850478e-05, "loss": 1.7041, "step": 5996, "task_loss": 0.7173780202865601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6105893850326538, "epoch": 5.07, "learning_rate": 2.7392692777308164e-05, "loss": 1.5997, "step": 5997, "task_loss": 1.1925582885742188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4162261486053467, "epoch": 5.07, "learning_rate": 2.738799661876585e-05, "loss": 1.5213, "step": 5998, "task_loss": 0.466021865606308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4770967960357666, "epoch": 5.07, "learning_rate": 2.7383300460223537e-05, "loss": 1.5651, "step": 5999, "task_loss": 0.6623186469078064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.3965227603912354, "epoch": 5.07, "learning_rate": 2.7378604301681226e-05, "loss": 1.8848, "step": 6000, "task_loss": 1.2494666576385498 }, { "epoch": 5.07, "eval_accuracy": 0.8315247524752475, "eval_loss": 0.9807378053665161, "eval_runtime": 226.339, "eval_samples_per_second": 111.558, "eval_steps_per_second": 0.875, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7662501335144043, "epoch": 5.07, "learning_rate": 2.7373908143138916e-05, "loss": 1.3311, "step": 6001, "task_loss": 1.343724250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9522209167480469, "epoch": 5.07, "learning_rate": 2.73692119845966e-05, "loss": 1.9406, "step": 6002, "task_loss": 2.0613434314727783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7874780893325806, "epoch": 5.07, "learning_rate": 2.736451582605429e-05, "loss": 1.5376, "step": 6003, "task_loss": 1.2606388330459595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8563027381896973, "epoch": 5.08, "learning_rate": 2.7359819667511975e-05, "loss": 1.4796, "step": 6004, "task_loss": 1.0875974893569946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8217250108718872, "epoch": 5.08, "learning_rate": 2.7355123508969665e-05, "loss": 1.3016, "step": 6005, "task_loss": 0.9278236627578735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8831559419631958, "epoch": 5.08, "learning_rate": 2.7350427350427355e-05, "loss": 1.3962, "step": 6006, "task_loss": 1.3202184438705444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2185256481170654, "epoch": 5.08, "learning_rate": 2.7345731191885038e-05, "loss": 1.3391, "step": 6007, "task_loss": 1.0860174894332886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48492521047592163, "epoch": 5.08, "learning_rate": 2.7341035033342727e-05, "loss": 1.3083, "step": 6008, "task_loss": 1.0913361310958862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.29097843170166, "epoch": 5.08, "learning_rate": 2.7336338874800417e-05, "loss": 1.6612, "step": 6009, "task_loss": 1.0779439210891724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.831801414489746, "epoch": 5.08, "learning_rate": 2.7331642716258103e-05, "loss": 1.6818, "step": 6010, "task_loss": 1.0645065307617188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.2680459022521973, "epoch": 5.08, "learning_rate": 2.7326946557715786e-05, "loss": 1.9825, "step": 6011, "task_loss": 2.25335693359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.501272201538086, "epoch": 5.08, "learning_rate": 2.7322250399173476e-05, "loss": 1.4031, "step": 6012, "task_loss": 1.037697434425354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.006222724914551, "epoch": 5.08, "learning_rate": 2.7317554240631166e-05, "loss": 1.7654, "step": 6013, "task_loss": 1.8549330234527588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5703160762786865, "epoch": 5.08, "learning_rate": 2.7312858082088855e-05, "loss": 1.2716, "step": 6014, "task_loss": 0.6937095522880554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.543492317199707, "epoch": 5.08, "learning_rate": 2.730816192354654e-05, "loss": 1.3936, "step": 6015, "task_loss": 0.9498792886734009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4141132831573486, "epoch": 5.09, "learning_rate": 2.7303465765004228e-05, "loss": 1.4084, "step": 6016, "task_loss": 2.1713035106658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.2215139865875244, "epoch": 5.09, "learning_rate": 2.7298769606461915e-05, "loss": 2.0117, "step": 6017, "task_loss": 1.5173630714416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6084805727005005, "epoch": 5.09, "learning_rate": 2.7294073447919604e-05, "loss": 1.404, "step": 6018, "task_loss": 1.4337973594665527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8146912455558777, "epoch": 5.09, "learning_rate": 2.7289377289377287e-05, "loss": 1.2187, "step": 6019, "task_loss": 0.6366134881973267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.15373694896698, "epoch": 5.09, "learning_rate": 2.7284681130834977e-05, "loss": 1.3174, "step": 6020, "task_loss": 0.638940691947937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.65285062789917, "epoch": 5.09, "learning_rate": 2.7279984972292667e-05, "loss": 1.6437, "step": 6021, "task_loss": 1.9222254753112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5105171203613281, "epoch": 5.09, "learning_rate": 2.7275288813750356e-05, "loss": 1.578, "step": 6022, "task_loss": 1.066010594367981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2227613925933838, "epoch": 5.09, "learning_rate": 2.7270592655208043e-05, "loss": 1.5835, "step": 6023, "task_loss": 1.7856481075286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9503490924835205, "epoch": 5.09, "learning_rate": 2.7265896496665726e-05, "loss": 1.3087, "step": 6024, "task_loss": 0.6125936508178711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.561194896697998, "epoch": 5.09, "learning_rate": 2.7261200338123415e-05, "loss": 1.2933, "step": 6025, "task_loss": 2.2164900302886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7554807662963867, "epoch": 5.09, "learning_rate": 2.7256504179581105e-05, "loss": 1.5082, "step": 6026, "task_loss": 0.9767686724662781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0444374084472656, "epoch": 5.09, "learning_rate": 2.7251808021038795e-05, "loss": 1.464, "step": 6027, "task_loss": 1.131515383720398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6854791641235352, "epoch": 5.1, "learning_rate": 2.7247111862496478e-05, "loss": 1.471, "step": 6028, "task_loss": 2.2208142280578613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1964408159255981, "epoch": 5.1, "learning_rate": 2.7242415703954168e-05, "loss": 1.4521, "step": 6029, "task_loss": 0.7715833783149719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1415436267852783, "epoch": 5.1, "learning_rate": 2.7237719545411854e-05, "loss": 1.2644, "step": 6030, "task_loss": 1.5832778215408325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8606854677200317, "epoch": 5.1, "learning_rate": 2.7233023386869544e-05, "loss": 1.2504, "step": 6031, "task_loss": 0.8619833588600159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.74061918258667, "epoch": 5.1, "learning_rate": 2.7228327228327227e-05, "loss": 1.406, "step": 6032, "task_loss": 1.0596593618392944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9357960224151611, "epoch": 5.1, "learning_rate": 2.7223631069784916e-05, "loss": 1.3556, "step": 6033, "task_loss": 0.7946279644966125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.196561336517334, "epoch": 5.1, "learning_rate": 2.7218934911242606e-05, "loss": 0.958, "step": 6034, "task_loss": 0.9397717714309692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8972089290618896, "epoch": 5.1, "learning_rate": 2.7214238752700292e-05, "loss": 1.4384, "step": 6035, "task_loss": 0.46294689178466797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6775273084640503, "epoch": 5.1, "learning_rate": 2.720954259415798e-05, "loss": 1.4422, "step": 6036, "task_loss": 1.3346761465072632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0288169384002686, "epoch": 5.1, "learning_rate": 2.7204846435615665e-05, "loss": 1.4545, "step": 6037, "task_loss": 1.0529216527938843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.247133493423462, "epoch": 5.1, "learning_rate": 2.7200150277073355e-05, "loss": 1.2885, "step": 6038, "task_loss": 1.5230854749679565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8417154550552368, "epoch": 5.1, "learning_rate": 2.7195454118531044e-05, "loss": 1.1793, "step": 6039, "task_loss": 1.413912296295166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3710811138153076, "epoch": 5.11, "learning_rate": 2.7190757959988734e-05, "loss": 1.0497, "step": 6040, "task_loss": 1.7059674263000488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5831513404846191, "epoch": 5.11, "learning_rate": 2.7186061801446417e-05, "loss": 1.4102, "step": 6041, "task_loss": 0.9912034273147583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6207759380340576, "epoch": 5.11, "learning_rate": 2.7181365642904107e-05, "loss": 1.8767, "step": 6042, "task_loss": 1.3208168745040894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2722163200378418, "epoch": 5.11, "learning_rate": 2.7176669484361793e-05, "loss": 1.4569, "step": 6043, "task_loss": 0.8121294975280762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.565006971359253, "epoch": 5.11, "learning_rate": 2.7171973325819483e-05, "loss": 1.7376, "step": 6044, "task_loss": 1.5227748155593872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8481736183166504, "epoch": 5.11, "learning_rate": 2.7167277167277166e-05, "loss": 1.5595, "step": 6045, "task_loss": 0.9575451016426086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.968367338180542, "epoch": 5.11, "learning_rate": 2.7162581008734856e-05, "loss": 1.2139, "step": 6046, "task_loss": 1.9066754579544067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1301653385162354, "epoch": 5.11, "learning_rate": 2.7157884850192545e-05, "loss": 1.1689, "step": 6047, "task_loss": 0.768576979637146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0179550647735596, "epoch": 5.11, "learning_rate": 2.715318869165023e-05, "loss": 1.2857, "step": 6048, "task_loss": 0.7878801226615906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3087983131408691, "epoch": 5.11, "learning_rate": 2.7148492533107918e-05, "loss": 1.1058, "step": 6049, "task_loss": 0.9538347721099854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0463907718658447, "epoch": 5.11, "learning_rate": 2.7143796374565604e-05, "loss": 1.1571, "step": 6050, "task_loss": 0.9137436151504517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4125372171401978, "epoch": 5.11, "learning_rate": 2.7139100216023294e-05, "loss": 1.4137, "step": 6051, "task_loss": 1.0317579507827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9229708909988403, "epoch": 5.12, "learning_rate": 2.7134404057480984e-05, "loss": 1.1499, "step": 6052, "task_loss": 1.275350570678711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.331318974494934, "epoch": 5.12, "learning_rate": 2.7129707898938674e-05, "loss": 1.1869, "step": 6053, "task_loss": 2.2413463592529297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.223301410675049, "epoch": 5.12, "learning_rate": 2.7125011740396357e-05, "loss": 1.2375, "step": 6054, "task_loss": 1.8653202056884766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.038719892501831, "epoch": 5.12, "learning_rate": 2.7120315581854043e-05, "loss": 1.4847, "step": 6055, "task_loss": 2.0970044136047363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0520880222320557, "epoch": 5.12, "learning_rate": 2.7115619423311733e-05, "loss": 1.3479, "step": 6056, "task_loss": 0.7605481147766113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.2198703289031982, "epoch": 5.12, "learning_rate": 2.7110923264769422e-05, "loss": 1.5342, "step": 6057, "task_loss": 1.539783000946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.760793924331665, "epoch": 5.12, "learning_rate": 2.7106227106227105e-05, "loss": 1.4329, "step": 6058, "task_loss": 1.3907667398452759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0877609252929688, "epoch": 5.12, "learning_rate": 2.7101530947684795e-05, "loss": 1.2575, "step": 6059, "task_loss": 1.697043538093567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1456019878387451, "epoch": 5.12, "learning_rate": 2.7096834789142485e-05, "loss": 1.3013, "step": 6060, "task_loss": 0.39524057507514954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7074329853057861, "epoch": 5.12, "learning_rate": 2.709213863060017e-05, "loss": 1.1546, "step": 6061, "task_loss": 1.4160292148590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3175756931304932, "epoch": 5.12, "learning_rate": 2.7087442472057854e-05, "loss": 0.9868, "step": 6062, "task_loss": 0.5832731127738953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1958972215652466, "epoch": 5.13, "learning_rate": 2.7082746313515544e-05, "loss": 1.248, "step": 6063, "task_loss": 0.5755500793457031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2761794328689575, "epoch": 5.13, "learning_rate": 2.7078050154973233e-05, "loss": 1.1467, "step": 6064, "task_loss": 0.6066094040870667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9923672676086426, "epoch": 5.13, "learning_rate": 2.7073353996430923e-05, "loss": 1.2313, "step": 6065, "task_loss": 1.2634145021438599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.806491732597351, "epoch": 5.13, "learning_rate": 2.7068657837888606e-05, "loss": 1.4147, "step": 6066, "task_loss": 1.430079460144043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8822911977767944, "epoch": 5.13, "learning_rate": 2.7063961679346296e-05, "loss": 1.422, "step": 6067, "task_loss": 1.0056214332580566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.966623067855835, "epoch": 5.13, "learning_rate": 2.7059265520803982e-05, "loss": 1.4988, "step": 6068, "task_loss": 1.0962300300598145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.99144446849823, "epoch": 5.13, "learning_rate": 2.7054569362261672e-05, "loss": 1.3089, "step": 6069, "task_loss": 0.6787312626838684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3439979553222656, "epoch": 5.13, "learning_rate": 2.704987320371936e-05, "loss": 1.5178, "step": 6070, "task_loss": 1.4543005228042603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8243721723556519, "epoch": 5.13, "learning_rate": 2.7045177045177045e-05, "loss": 1.6764, "step": 6071, "task_loss": 2.1928622722625732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2107899188995361, "epoch": 5.13, "learning_rate": 2.7040480886634734e-05, "loss": 1.5046, "step": 6072, "task_loss": 1.5262925624847412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7872982025146484, "epoch": 5.13, "learning_rate": 2.7035784728092424e-05, "loss": 1.1615, "step": 6073, "task_loss": 0.08315528929233551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0898466110229492, "epoch": 5.13, "learning_rate": 2.703108856955011e-05, "loss": 1.4369, "step": 6074, "task_loss": 0.39144861698150635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5850605964660645, "epoch": 5.14, "learning_rate": 2.7026392411007793e-05, "loss": 1.4368, "step": 6075, "task_loss": 1.384130835533142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.100174903869629, "epoch": 5.14, "learning_rate": 2.7021696252465483e-05, "loss": 1.1585, "step": 6076, "task_loss": 1.4277344942092896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9928566217422485, "epoch": 5.14, "learning_rate": 2.7017000093923173e-05, "loss": 1.4928, "step": 6077, "task_loss": 2.2164745330810547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.842015027999878, "epoch": 5.14, "learning_rate": 2.7012303935380863e-05, "loss": 1.7829, "step": 6078, "task_loss": 1.5232429504394531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9228453636169434, "epoch": 5.14, "learning_rate": 2.7007607776838545e-05, "loss": 1.0075, "step": 6079, "task_loss": 0.9605417251586914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9140515923500061, "epoch": 5.14, "learning_rate": 2.7002911618296235e-05, "loss": 1.1614, "step": 6080, "task_loss": 1.0511114597320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.028151512145996, "epoch": 5.14, "learning_rate": 2.699821545975392e-05, "loss": 1.5562, "step": 6081, "task_loss": 0.42041832208633423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8639107942581177, "epoch": 5.14, "learning_rate": 2.699351930121161e-05, "loss": 1.2336, "step": 6082, "task_loss": 0.6014939546585083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9981849789619446, "epoch": 5.14, "learning_rate": 2.69888231426693e-05, "loss": 1.1848, "step": 6083, "task_loss": 0.7643138766288757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9919228553771973, "epoch": 5.14, "learning_rate": 2.6984126984126984e-05, "loss": 1.1664, "step": 6084, "task_loss": 0.6952190399169922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5592262744903564, "epoch": 5.14, "learning_rate": 2.6979430825584674e-05, "loss": 1.4078, "step": 6085, "task_loss": 1.4757106304168701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7650840282440186, "epoch": 5.14, "learning_rate": 2.6974734667042363e-05, "loss": 1.6725, "step": 6086, "task_loss": 0.8313434720039368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9213629961013794, "epoch": 5.15, "learning_rate": 2.697003850850005e-05, "loss": 1.5162, "step": 6087, "task_loss": 0.77763432264328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8686922788619995, "epoch": 5.15, "learning_rate": 2.6965342349957733e-05, "loss": 1.5868, "step": 6088, "task_loss": 1.0385090112686157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2186870574951172, "epoch": 5.15, "learning_rate": 2.6960646191415422e-05, "loss": 1.0529, "step": 6089, "task_loss": 0.7597925066947937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.56624436378479, "epoch": 5.15, "learning_rate": 2.6955950032873112e-05, "loss": 1.4093, "step": 6090, "task_loss": 1.1457046270370483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.48423171043396, "epoch": 5.15, "learning_rate": 2.6951253874330802e-05, "loss": 1.1017, "step": 6091, "task_loss": 1.1203498840332031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7660644054412842, "epoch": 5.15, "learning_rate": 2.6946557715788485e-05, "loss": 1.2766, "step": 6092, "task_loss": 1.1163121461868286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3043910264968872, "epoch": 5.15, "learning_rate": 2.6941861557246175e-05, "loss": 1.2075, "step": 6093, "task_loss": 1.3990834951400757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6476186513900757, "epoch": 5.15, "learning_rate": 2.693716539870386e-05, "loss": 1.3961, "step": 6094, "task_loss": 1.104543924331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.882048487663269, "epoch": 5.15, "learning_rate": 2.693246924016155e-05, "loss": 1.1462, "step": 6095, "task_loss": 1.5248403549194336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2639052867889404, "epoch": 5.15, "learning_rate": 2.6927773081619234e-05, "loss": 1.5688, "step": 6096, "task_loss": 1.4257371425628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3533936738967896, "epoch": 5.15, "learning_rate": 2.6923076923076923e-05, "loss": 1.2572, "step": 6097, "task_loss": 1.4637911319732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8188688158988953, "epoch": 5.15, "learning_rate": 2.6918380764534613e-05, "loss": 1.1332, "step": 6098, "task_loss": 1.5603359937667847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.67162024974823, "epoch": 5.16, "learning_rate": 2.69136846059923e-05, "loss": 1.251, "step": 6099, "task_loss": 1.4481875896453857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2423639297485352, "epoch": 5.16, "learning_rate": 2.690898844744999e-05, "loss": 1.3194, "step": 6100, "task_loss": 0.5610933899879456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1068581342697144, "epoch": 5.16, "learning_rate": 2.6904292288907672e-05, "loss": 1.1608, "step": 6101, "task_loss": 3.1303048133850098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.431076169013977, "epoch": 5.16, "learning_rate": 2.6899596130365362e-05, "loss": 1.128, "step": 6102, "task_loss": 0.6214473247528076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.376178503036499, "epoch": 5.16, "learning_rate": 2.689489997182305e-05, "loss": 1.2766, "step": 6103, "task_loss": 0.6475787162780762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2743505239486694, "epoch": 5.16, "learning_rate": 2.689020381328074e-05, "loss": 1.1901, "step": 6104, "task_loss": 0.7211982607841492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5053305625915527, "epoch": 5.16, "learning_rate": 2.6885507654738424e-05, "loss": 1.4493, "step": 6105, "task_loss": 0.6894542574882507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5276148319244385, "epoch": 5.16, "learning_rate": 2.688081149619611e-05, "loss": 1.1566, "step": 6106, "task_loss": 1.2581615447998047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0961766242980957, "epoch": 5.16, "learning_rate": 2.68761153376538e-05, "loss": 1.5164, "step": 6107, "task_loss": 0.9878286123275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5335888862609863, "epoch": 5.16, "learning_rate": 2.687141917911149e-05, "loss": 0.9308, "step": 6108, "task_loss": 0.4749242663383484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2188602685928345, "epoch": 5.16, "learning_rate": 2.6866723020569173e-05, "loss": 1.1145, "step": 6109, "task_loss": 1.1711950302124023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4144906997680664, "epoch": 5.16, "learning_rate": 2.6862026862026863e-05, "loss": 1.5315, "step": 6110, "task_loss": 1.0388909578323364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6925595998764038, "epoch": 5.17, "learning_rate": 2.6857330703484552e-05, "loss": 1.5303, "step": 6111, "task_loss": 1.4400750398635864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8189839124679565, "epoch": 5.17, "learning_rate": 2.685263454494224e-05, "loss": 1.1633, "step": 6112, "task_loss": 1.469436764717102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7965493202209473, "epoch": 5.17, "learning_rate": 2.6847938386399925e-05, "loss": 1.2321, "step": 6113, "task_loss": 1.2704484462738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5181885957717896, "epoch": 5.17, "learning_rate": 2.684324222785761e-05, "loss": 1.3499, "step": 6114, "task_loss": 1.2870570421218872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4461888074874878, "epoch": 5.17, "learning_rate": 2.68385460693153e-05, "loss": 1.3922, "step": 6115, "task_loss": 1.0139142274856567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6951039433479309, "epoch": 5.17, "learning_rate": 2.683384991077299e-05, "loss": 1.1581, "step": 6116, "task_loss": 0.37230202555656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3888874053955078, "epoch": 5.17, "learning_rate": 2.682915375223068e-05, "loss": 1.2628, "step": 6117, "task_loss": 0.3806201219558716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.343388319015503, "epoch": 5.17, "learning_rate": 2.6824457593688364e-05, "loss": 1.3597, "step": 6118, "task_loss": 1.174654483795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.825045108795166, "epoch": 5.17, "learning_rate": 2.681976143514605e-05, "loss": 1.1432, "step": 6119, "task_loss": 1.1517502069473267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0110048055648804, "epoch": 5.17, "learning_rate": 2.681506527660374e-05, "loss": 1.1355, "step": 6120, "task_loss": 0.5486020445823669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7390542030334473, "epoch": 5.17, "learning_rate": 2.681036911806143e-05, "loss": 1.4507, "step": 6121, "task_loss": 2.345353364944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8099324107170105, "epoch": 5.17, "learning_rate": 2.6805672959519112e-05, "loss": 1.1678, "step": 6122, "task_loss": 0.7387697100639343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5662109851837158, "epoch": 5.18, "learning_rate": 2.6800976800976802e-05, "loss": 1.416, "step": 6123, "task_loss": 1.1730748414993286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.937831461429596, "epoch": 5.18, "learning_rate": 2.6796280642434492e-05, "loss": 1.161, "step": 6124, "task_loss": 1.0569132566452026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4459607601165771, "epoch": 5.18, "learning_rate": 2.6791584483892178e-05, "loss": 1.2346, "step": 6125, "task_loss": 1.0594303607940674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2699089050292969, "epoch": 5.18, "learning_rate": 2.678688832534986e-05, "loss": 1.0665, "step": 6126, "task_loss": 0.6816018223762512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1862905025482178, "epoch": 5.18, "learning_rate": 2.678219216680755e-05, "loss": 1.2084, "step": 6127, "task_loss": 0.269861102104187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.633484959602356, "epoch": 5.18, "learning_rate": 2.677749600826524e-05, "loss": 1.9365, "step": 6128, "task_loss": 0.8539460897445679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5797567367553711, "epoch": 5.18, "learning_rate": 2.677279984972293e-05, "loss": 1.0908, "step": 6129, "task_loss": 0.884101390838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1826082468032837, "epoch": 5.18, "learning_rate": 2.6768103691180617e-05, "loss": 1.1799, "step": 6130, "task_loss": 1.1301647424697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1413825750350952, "epoch": 5.18, "learning_rate": 2.6763407532638303e-05, "loss": 1.2428, "step": 6131, "task_loss": 1.1832886934280396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9571319818496704, "epoch": 5.18, "learning_rate": 2.675871137409599e-05, "loss": 1.109, "step": 6132, "task_loss": 1.0448585748672485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1077163219451904, "epoch": 5.18, "learning_rate": 2.675401521555368e-05, "loss": 1.0242, "step": 6133, "task_loss": 0.5394262671470642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4249165058135986, "epoch": 5.19, "learning_rate": 2.674931905701137e-05, "loss": 1.4015, "step": 6134, "task_loss": 2.117480754852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.258486032485962, "epoch": 5.19, "learning_rate": 2.674462289846905e-05, "loss": 1.1072, "step": 6135, "task_loss": 0.5978756546974182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7848345041275024, "epoch": 5.19, "learning_rate": 2.673992673992674e-05, "loss": 0.7429, "step": 6136, "task_loss": 0.8130994439125061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.338639497756958, "epoch": 5.19, "learning_rate": 2.673523058138443e-05, "loss": 1.0678, "step": 6137, "task_loss": 1.8277900218963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6257516145706177, "epoch": 5.19, "learning_rate": 2.6730534422842117e-05, "loss": 1.0829, "step": 6138, "task_loss": 1.0870119333267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.579148292541504, "epoch": 5.19, "learning_rate": 2.67258382642998e-05, "loss": 1.3776, "step": 6139, "task_loss": 0.5376133918762207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2613327503204346, "epoch": 5.19, "learning_rate": 2.672114210575749e-05, "loss": 1.1508, "step": 6140, "task_loss": 0.6467357873916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.401784896850586, "epoch": 5.19, "learning_rate": 2.671644594721518e-05, "loss": 1.3999, "step": 6141, "task_loss": 1.8623753786087036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8652021288871765, "epoch": 5.19, "learning_rate": 2.671174978867287e-05, "loss": 1.1823, "step": 6142, "task_loss": 0.4130382239818573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7996633052825928, "epoch": 5.19, "learning_rate": 2.6707053630130553e-05, "loss": 1.1557, "step": 6143, "task_loss": 0.7167235016822815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8029072284698486, "epoch": 5.19, "learning_rate": 2.6702357471588242e-05, "loss": 1.1946, "step": 6144, "task_loss": 1.0476367473602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1070213317871094, "epoch": 5.19, "learning_rate": 2.669766131304593e-05, "loss": 1.1934, "step": 6145, "task_loss": 0.9667125940322876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5162668228149414, "epoch": 5.2, "learning_rate": 2.669296515450362e-05, "loss": 1.284, "step": 6146, "task_loss": 1.616597056388855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.201389193534851, "epoch": 5.2, "learning_rate": 2.6688268995961308e-05, "loss": 1.0976, "step": 6147, "task_loss": 0.787985622882843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7497252225875854, "epoch": 5.2, "learning_rate": 2.668357283741899e-05, "loss": 0.9701, "step": 6148, "task_loss": 1.0974105596542358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.155273675918579, "epoch": 5.2, "learning_rate": 2.667887667887668e-05, "loss": 0.9958, "step": 6149, "task_loss": 0.6040500998497009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.132684588432312, "epoch": 5.2, "learning_rate": 2.6674180520334367e-05, "loss": 1.0203, "step": 6150, "task_loss": 0.9335824251174927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6399801969528198, "epoch": 5.2, "learning_rate": 2.6669484361792057e-05, "loss": 0.9643, "step": 6151, "task_loss": 0.6210339069366455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6683968305587769, "epoch": 5.2, "learning_rate": 2.666478820324974e-05, "loss": 1.4203, "step": 6152, "task_loss": 0.7052663564682007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6595520973205566, "epoch": 5.2, "learning_rate": 2.666009204470743e-05, "loss": 1.2996, "step": 6153, "task_loss": 2.007265329360962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6942312717437744, "epoch": 5.2, "learning_rate": 2.665539588616512e-05, "loss": 1.3325, "step": 6154, "task_loss": 1.0949172973632812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6002906560897827, "epoch": 5.2, "learning_rate": 2.665069972762281e-05, "loss": 1.0721, "step": 6155, "task_loss": 1.6846591234207153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3470604419708252, "epoch": 5.2, "learning_rate": 2.6646003569080492e-05, "loss": 1.263, "step": 6156, "task_loss": 1.3509641885757446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2867796421051025, "epoch": 5.2, "learning_rate": 2.6641307410538178e-05, "loss": 1.2219, "step": 6157, "task_loss": 1.1966526508331299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2978180646896362, "epoch": 5.21, "learning_rate": 2.6636611251995868e-05, "loss": 1.2496, "step": 6158, "task_loss": 1.253466248512268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1556897163391113, "epoch": 5.21, "learning_rate": 2.6631915093453558e-05, "loss": 1.2869, "step": 6159, "task_loss": 1.6497784852981567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9617151021957397, "epoch": 5.21, "learning_rate": 2.6627218934911247e-05, "loss": 1.0671, "step": 6160, "task_loss": 1.0358563661575317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1619236469268799, "epoch": 5.21, "learning_rate": 2.662252277636893e-05, "loss": 1.4642, "step": 6161, "task_loss": 0.667813777923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.603804349899292, "epoch": 5.21, "learning_rate": 2.661782661782662e-05, "loss": 1.2215, "step": 6162, "task_loss": 2.0797836780548096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4060077667236328, "epoch": 5.21, "learning_rate": 2.6613130459284306e-05, "loss": 1.4375, "step": 6163, "task_loss": 1.161781668663025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.009763240814209, "epoch": 5.21, "learning_rate": 2.6608434300741996e-05, "loss": 0.9579, "step": 6164, "task_loss": 0.9952691793441772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0181652307510376, "epoch": 5.21, "learning_rate": 2.660373814219968e-05, "loss": 1.0573, "step": 6165, "task_loss": 1.2562041282653809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1134276390075684, "epoch": 5.21, "learning_rate": 2.659904198365737e-05, "loss": 0.9777, "step": 6166, "task_loss": 1.0121058225631714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0962872505187988, "epoch": 5.21, "learning_rate": 2.659434582511506e-05, "loss": 1.3992, "step": 6167, "task_loss": 0.6822119951248169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.163713812828064, "epoch": 5.21, "learning_rate": 2.658964966657275e-05, "loss": 1.0107, "step": 6168, "task_loss": 1.0314366817474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7791461944580078, "epoch": 5.21, "learning_rate": 2.658495350803043e-05, "loss": 1.2019, "step": 6169, "task_loss": 0.672261118888855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7127944827079773, "epoch": 5.22, "learning_rate": 2.6580257349488118e-05, "loss": 0.9899, "step": 6170, "task_loss": 0.8570666909217834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4738926887512207, "epoch": 5.22, "learning_rate": 2.6575561190945807e-05, "loss": 1.2977, "step": 6171, "task_loss": 1.523746132850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8724461793899536, "epoch": 5.22, "learning_rate": 2.6570865032403497e-05, "loss": 1.1183, "step": 6172, "task_loss": 0.9286195635795593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0347788333892822, "epoch": 5.22, "learning_rate": 2.656616887386118e-05, "loss": 1.3287, "step": 6173, "task_loss": 0.5948042273521423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8637802600860596, "epoch": 5.22, "learning_rate": 2.656147271531887e-05, "loss": 1.2724, "step": 6174, "task_loss": 1.007073998451233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1761953830718994, "epoch": 5.22, "learning_rate": 2.655677655677656e-05, "loss": 1.0793, "step": 6175, "task_loss": 0.9497080445289612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0064382553100586, "epoch": 5.22, "learning_rate": 2.6552080398234246e-05, "loss": 1.1906, "step": 6176, "task_loss": 0.7407934069633484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9420294761657715, "epoch": 5.22, "learning_rate": 2.6547384239691936e-05, "loss": 1.1066, "step": 6177, "task_loss": 0.9494105577468872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.763310432434082, "epoch": 5.22, "learning_rate": 2.654268808114962e-05, "loss": 0.7693, "step": 6178, "task_loss": 1.0830469131469727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5182173252105713, "epoch": 5.22, "learning_rate": 2.6537991922607308e-05, "loss": 1.2633, "step": 6179, "task_loss": 1.0931538343429565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8743488192558289, "epoch": 5.22, "learning_rate": 2.6533295764064998e-05, "loss": 1.0093, "step": 6180, "task_loss": 0.5811735391616821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0481631755828857, "epoch": 5.22, "learning_rate": 2.6528599605522688e-05, "loss": 1.3282, "step": 6181, "task_loss": 0.9845531582832336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8869565725326538, "epoch": 5.23, "learning_rate": 2.652390344698037e-05, "loss": 1.126, "step": 6182, "task_loss": 1.1474261283874512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.761346459388733, "epoch": 5.23, "learning_rate": 2.6519207288438057e-05, "loss": 1.2608, "step": 6183, "task_loss": 2.7264761924743652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5554115772247314, "epoch": 5.23, "learning_rate": 2.6514511129895747e-05, "loss": 1.132, "step": 6184, "task_loss": 0.7846354246139526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6136316061019897, "epoch": 5.23, "learning_rate": 2.6509814971353436e-05, "loss": 1.3493, "step": 6185, "task_loss": 1.4481470584869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.132309913635254, "epoch": 5.23, "learning_rate": 2.650511881281112e-05, "loss": 1.1932, "step": 6186, "task_loss": 0.976943850517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6539921760559082, "epoch": 5.23, "learning_rate": 2.650042265426881e-05, "loss": 1.6107, "step": 6187, "task_loss": 1.3986767530441284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7405858039855957, "epoch": 5.23, "learning_rate": 2.64957264957265e-05, "loss": 1.3736, "step": 6188, "task_loss": 1.4649220705032349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0874395370483398, "epoch": 5.23, "learning_rate": 2.6491030337184185e-05, "loss": 1.2465, "step": 6189, "task_loss": 1.2736620903015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.33677077293396, "epoch": 5.23, "learning_rate": 2.6486334178641868e-05, "loss": 0.9569, "step": 6190, "task_loss": 0.3898025453090668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1996185779571533, "epoch": 5.23, "learning_rate": 2.6481638020099558e-05, "loss": 1.1106, "step": 6191, "task_loss": 1.4132800102233887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5369480848312378, "epoch": 5.23, "learning_rate": 2.6476941861557248e-05, "loss": 1.4528, "step": 6192, "task_loss": 1.032072901725769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3576630353927612, "epoch": 5.23, "learning_rate": 2.6472245703014937e-05, "loss": 1.1274, "step": 6193, "task_loss": 1.2482482194900513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.205902099609375, "epoch": 5.24, "learning_rate": 2.6467549544472624e-05, "loss": 1.6272, "step": 6194, "task_loss": 0.7223795056343079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6535296440124512, "epoch": 5.24, "learning_rate": 2.646285338593031e-05, "loss": 1.3122, "step": 6195, "task_loss": 1.471187949180603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2265735864639282, "epoch": 5.24, "learning_rate": 2.6458157227387996e-05, "loss": 0.9638, "step": 6196, "task_loss": 1.6015468835830688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.218266248703003, "epoch": 5.24, "learning_rate": 2.6453461068845686e-05, "loss": 1.1289, "step": 6197, "task_loss": 0.9043564796447754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0610432624816895, "epoch": 5.24, "learning_rate": 2.6448764910303376e-05, "loss": 1.2271, "step": 6198, "task_loss": 0.833638072013855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8974314332008362, "epoch": 5.24, "learning_rate": 2.644406875176106e-05, "loss": 1.0875, "step": 6199, "task_loss": 0.6530605554580688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.851225733757019, "epoch": 5.24, "learning_rate": 2.643937259321875e-05, "loss": 1.0794, "step": 6200, "task_loss": 0.8912054300308228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.969151496887207, "epoch": 5.24, "learning_rate": 2.6434676434676435e-05, "loss": 1.4437, "step": 6201, "task_loss": 0.7171374559402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1624693870544434, "epoch": 5.24, "learning_rate": 2.6429980276134125e-05, "loss": 1.0727, "step": 6202, "task_loss": 1.8397388458251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9696145057678223, "epoch": 5.24, "learning_rate": 2.6425284117591807e-05, "loss": 1.1474, "step": 6203, "task_loss": 0.941592276096344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6954988837242126, "epoch": 5.24, "learning_rate": 2.6420587959049497e-05, "loss": 1.0299, "step": 6204, "task_loss": 0.6937874555587769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4483092427253723, "epoch": 5.24, "learning_rate": 2.6415891800507187e-05, "loss": 0.9922, "step": 6205, "task_loss": 0.3380734622478485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5732932090759277, "epoch": 5.25, "learning_rate": 2.6411195641964877e-05, "loss": 1.312, "step": 6206, "task_loss": 1.6250627040863037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2279834747314453, "epoch": 5.25, "learning_rate": 2.6406499483422563e-05, "loss": 1.3742, "step": 6207, "task_loss": 1.3332117795944214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8850380182266235, "epoch": 5.25, "learning_rate": 2.640180332488025e-05, "loss": 1.0146, "step": 6208, "task_loss": 0.8966948986053467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8743408918380737, "epoch": 5.25, "learning_rate": 2.6397107166337936e-05, "loss": 1.2203, "step": 6209, "task_loss": 0.6471081376075745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.928680956363678, "epoch": 5.25, "learning_rate": 2.6392411007795625e-05, "loss": 0.9443, "step": 6210, "task_loss": 1.6760292053222656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.535560131072998, "epoch": 5.25, "learning_rate": 2.6387714849253315e-05, "loss": 1.5162, "step": 6211, "task_loss": 0.9994226098060608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 3.1606507301330566, "epoch": 5.25, "learning_rate": 2.6383018690710998e-05, "loss": 1.6196, "step": 6212, "task_loss": 1.8813225030899048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7509488463401794, "epoch": 5.25, "learning_rate": 2.6378322532168688e-05, "loss": 1.1811, "step": 6213, "task_loss": 0.18218758702278137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.429240107536316, "epoch": 5.25, "learning_rate": 2.6373626373626374e-05, "loss": 1.0223, "step": 6214, "task_loss": 1.5722639560699463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2544280290603638, "epoch": 5.25, "learning_rate": 2.6368930215084064e-05, "loss": 1.469, "step": 6215, "task_loss": 2.005526542663574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5714510679244995, "epoch": 5.25, "learning_rate": 2.6364234056541747e-05, "loss": 1.2638, "step": 6216, "task_loss": 1.1345816850662231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0270071029663086, "epoch": 5.26, "learning_rate": 2.6359537897999437e-05, "loss": 1.1066, "step": 6217, "task_loss": 0.5646890997886658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7078343033790588, "epoch": 5.26, "learning_rate": 2.6354841739457126e-05, "loss": 1.1184, "step": 6218, "task_loss": 0.40160274505615234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6631669998168945, "epoch": 5.26, "learning_rate": 2.6350145580914816e-05, "loss": 1.0835, "step": 6219, "task_loss": 0.4133947193622589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4783446788787842, "epoch": 5.26, "learning_rate": 2.63454494223725e-05, "loss": 1.3532, "step": 6220, "task_loss": 1.4772411584854126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4749538898468018, "epoch": 5.26, "learning_rate": 2.6340753263830185e-05, "loss": 1.2705, "step": 6221, "task_loss": 0.6059532165527344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7464972734451294, "epoch": 5.26, "learning_rate": 2.6336057105287875e-05, "loss": 1.2724, "step": 6222, "task_loss": 0.8676250576972961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0208845138549805, "epoch": 5.26, "learning_rate": 2.6331360946745565e-05, "loss": 0.9391, "step": 6223, "task_loss": 0.611379086971283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2742042541503906, "epoch": 5.26, "learning_rate": 2.6326664788203254e-05, "loss": 1.3407, "step": 6224, "task_loss": 0.6895625591278076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5953598022460938, "epoch": 5.26, "learning_rate": 2.6321968629660937e-05, "loss": 1.2846, "step": 6225, "task_loss": 1.449446201324463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0208227634429932, "epoch": 5.26, "learning_rate": 2.6317272471118627e-05, "loss": 1.034, "step": 6226, "task_loss": 0.30685529112815857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5127379894256592, "epoch": 5.26, "learning_rate": 2.6312576312576314e-05, "loss": 1.3577, "step": 6227, "task_loss": 0.4128589928150177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7353520393371582, "epoch": 5.26, "learning_rate": 2.6307880154034003e-05, "loss": 1.2173, "step": 6228, "task_loss": 2.3256077766418457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.039460301399231, "epoch": 5.27, "learning_rate": 2.6303183995491686e-05, "loss": 1.1937, "step": 6229, "task_loss": 0.7884426712989807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.445837140083313, "epoch": 5.27, "learning_rate": 2.6298487836949376e-05, "loss": 1.6233, "step": 6230, "task_loss": 1.121649146080017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1477904319763184, "epoch": 5.27, "learning_rate": 2.6293791678407066e-05, "loss": 1.0992, "step": 6231, "task_loss": 0.87971031665802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6022188067436218, "epoch": 5.27, "learning_rate": 2.6289095519864755e-05, "loss": 1.1678, "step": 6232, "task_loss": 0.31204754114151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.944837212562561, "epoch": 5.27, "learning_rate": 2.628439936132244e-05, "loss": 1.1334, "step": 6233, "task_loss": 1.4712275266647339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1013222932815552, "epoch": 5.27, "learning_rate": 2.6279703202780125e-05, "loss": 1.2401, "step": 6234, "task_loss": 1.4451813697814941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.082606554031372, "epoch": 5.27, "learning_rate": 2.6275007044237814e-05, "loss": 1.2169, "step": 6235, "task_loss": 1.1276285648345947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7242954969406128, "epoch": 5.27, "learning_rate": 2.6270310885695504e-05, "loss": 1.3732, "step": 6236, "task_loss": 1.0223360061645508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.33125901222229, "epoch": 5.27, "learning_rate": 2.6265614727153194e-05, "loss": 1.2937, "step": 6237, "task_loss": 0.883307158946991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3754355907440186, "epoch": 5.27, "learning_rate": 2.6260918568610877e-05, "loss": 1.1066, "step": 6238, "task_loss": 1.233985185623169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0425667762756348, "epoch": 5.27, "learning_rate": 2.6256222410068567e-05, "loss": 0.8587, "step": 6239, "task_loss": 0.6798689961433411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1578224897384644, "epoch": 5.27, "learning_rate": 2.6251526251526253e-05, "loss": 1.0135, "step": 6240, "task_loss": 1.5707179307937622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7140103578567505, "epoch": 5.28, "learning_rate": 2.6246830092983943e-05, "loss": 1.0976, "step": 6241, "task_loss": 2.9521336555480957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9706842303276062, "epoch": 5.28, "learning_rate": 2.6242133934441626e-05, "loss": 0.9612, "step": 6242, "task_loss": 1.531694769859314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0437289476394653, "epoch": 5.28, "learning_rate": 2.6237437775899315e-05, "loss": 1.0858, "step": 6243, "task_loss": 1.9825717210769653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7636774778366089, "epoch": 5.28, "learning_rate": 2.6232741617357005e-05, "loss": 1.2448, "step": 6244, "task_loss": 0.8850448727607727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.923209011554718, "epoch": 5.28, "learning_rate": 2.622804545881469e-05, "loss": 1.2228, "step": 6245, "task_loss": 1.6893441677093506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2808351516723633, "epoch": 5.28, "learning_rate": 2.6223349300272378e-05, "loss": 1.2283, "step": 6246, "task_loss": 1.5021425485610962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0824205875396729, "epoch": 5.28, "learning_rate": 2.6218653141730064e-05, "loss": 0.833, "step": 6247, "task_loss": 0.9099451899528503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6898624897003174, "epoch": 5.28, "learning_rate": 2.6213956983187754e-05, "loss": 1.0686, "step": 6248, "task_loss": 0.8602155447006226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0620982646942139, "epoch": 5.28, "learning_rate": 2.6209260824645443e-05, "loss": 0.9658, "step": 6249, "task_loss": 1.9382007122039795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.317171573638916, "epoch": 5.28, "learning_rate": 2.6204564666103126e-05, "loss": 1.0456, "step": 6250, "task_loss": 1.481420874595642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8809815049171448, "epoch": 5.28, "learning_rate": 2.6199868507560816e-05, "loss": 1.1278, "step": 6251, "task_loss": 1.2659659385681152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9375021457672119, "epoch": 5.28, "learning_rate": 2.6195172349018502e-05, "loss": 1.2362, "step": 6252, "task_loss": 1.1262375116348267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7712739109992981, "epoch": 5.29, "learning_rate": 2.6190476190476192e-05, "loss": 0.8891, "step": 6253, "task_loss": 0.4744390845298767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9102873802185059, "epoch": 5.29, "learning_rate": 2.6185780031933882e-05, "loss": 1.0323, "step": 6254, "task_loss": 0.2178632766008377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.329350233078003, "epoch": 5.29, "learning_rate": 2.6181083873391565e-05, "loss": 1.1285, "step": 6255, "task_loss": 1.2384041547775269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0683484077453613, "epoch": 5.29, "learning_rate": 2.6176387714849255e-05, "loss": 0.9984, "step": 6256, "task_loss": 1.2279257774353027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.657471239566803, "epoch": 5.29, "learning_rate": 2.6171691556306944e-05, "loss": 0.9212, "step": 6257, "task_loss": 0.5140358805656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.202399730682373, "epoch": 5.29, "learning_rate": 2.616699539776463e-05, "loss": 1.3406, "step": 6258, "task_loss": 1.6157007217407227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7269793152809143, "epoch": 5.29, "learning_rate": 2.6162299239222317e-05, "loss": 1.1243, "step": 6259, "task_loss": 0.5015912652015686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7032637596130371, "epoch": 5.29, "learning_rate": 2.6157603080680003e-05, "loss": 1.1415, "step": 6260, "task_loss": 1.0667850971221924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8244988918304443, "epoch": 5.29, "learning_rate": 2.6152906922137693e-05, "loss": 1.1015, "step": 6261, "task_loss": 1.6181848049163818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7789819240570068, "epoch": 5.29, "learning_rate": 2.6148210763595383e-05, "loss": 1.1612, "step": 6262, "task_loss": 1.3308385610580444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1467335224151611, "epoch": 5.29, "learning_rate": 2.6143514605053066e-05, "loss": 1.115, "step": 6263, "task_loss": 0.5159995555877686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1943199634552002, "epoch": 5.29, "learning_rate": 2.6138818446510756e-05, "loss": 1.1475, "step": 6264, "task_loss": 1.0690507888793945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1759699583053589, "epoch": 5.3, "learning_rate": 2.6134122287968442e-05, "loss": 1.2338, "step": 6265, "task_loss": 0.7229683995246887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8486177921295166, "epoch": 5.3, "learning_rate": 2.612942612942613e-05, "loss": 0.9601, "step": 6266, "task_loss": 0.8885012269020081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8676465749740601, "epoch": 5.3, "learning_rate": 2.6124729970883815e-05, "loss": 1.0845, "step": 6267, "task_loss": 0.6014307141304016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3205220699310303, "epoch": 5.3, "learning_rate": 2.6120033812341504e-05, "loss": 1.1579, "step": 6268, "task_loss": 1.1648128032684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.163299322128296, "epoch": 5.3, "learning_rate": 2.6115337653799194e-05, "loss": 1.0619, "step": 6269, "task_loss": 1.0477020740509033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2350797653198242, "epoch": 5.3, "learning_rate": 2.6110641495256884e-05, "loss": 1.1997, "step": 6270, "task_loss": 0.47865501046180725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.043689489364624, "epoch": 5.3, "learning_rate": 2.610594533671457e-05, "loss": 1.2971, "step": 6271, "task_loss": 1.0475184917449951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2485613822937012, "epoch": 5.3, "learning_rate": 2.6101249178172253e-05, "loss": 1.2204, "step": 6272, "task_loss": 1.0651555061340332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8915637731552124, "epoch": 5.3, "learning_rate": 2.6096553019629943e-05, "loss": 1.065, "step": 6273, "task_loss": 0.740326464176178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7085151672363281, "epoch": 5.3, "learning_rate": 2.6091856861087632e-05, "loss": 0.7562, "step": 6274, "task_loss": 0.9056514501571655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2573350667953491, "epoch": 5.3, "learning_rate": 2.6087160702545322e-05, "loss": 1.1908, "step": 6275, "task_loss": 0.3315636217594147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.064650535583496, "epoch": 5.3, "learning_rate": 2.6082464544003005e-05, "loss": 0.9445, "step": 6276, "task_loss": 0.601737380027771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6807420253753662, "epoch": 5.31, "learning_rate": 2.6077768385460695e-05, "loss": 1.2413, "step": 6277, "task_loss": 1.6614559888839722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8984200358390808, "epoch": 5.31, "learning_rate": 2.607307222691838e-05, "loss": 1.0405, "step": 6278, "task_loss": 0.4443584382534027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7277846932411194, "epoch": 5.31, "learning_rate": 2.606837606837607e-05, "loss": 0.8638, "step": 6279, "task_loss": 0.7855465412139893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2097464799880981, "epoch": 5.31, "learning_rate": 2.6063679909833754e-05, "loss": 1.0682, "step": 6280, "task_loss": 1.6008341312408447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4900636672973633, "epoch": 5.31, "learning_rate": 2.6058983751291444e-05, "loss": 1.0517, "step": 6281, "task_loss": 1.4388266801834106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4992321729660034, "epoch": 5.31, "learning_rate": 2.6054287592749133e-05, "loss": 1.1086, "step": 6282, "task_loss": 0.4883953630924225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7810841798782349, "epoch": 5.31, "learning_rate": 2.6049591434206823e-05, "loss": 0.8893, "step": 6283, "task_loss": 0.8573915958404541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.777472972869873, "epoch": 5.31, "learning_rate": 2.604489527566451e-05, "loss": 1.0008, "step": 6284, "task_loss": 1.1877861022949219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9558157920837402, "epoch": 5.31, "learning_rate": 2.6040199117122192e-05, "loss": 1.2275, "step": 6285, "task_loss": 2.007687568664551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8540509939193726, "epoch": 5.31, "learning_rate": 2.6035502958579882e-05, "loss": 1.1215, "step": 6286, "task_loss": 0.7785129547119141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2505308389663696, "epoch": 5.31, "learning_rate": 2.6030806800037572e-05, "loss": 1.0324, "step": 6287, "task_loss": 0.9966123104095459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4795042276382446, "epoch": 5.32, "learning_rate": 2.602611064149526e-05, "loss": 1.2354, "step": 6288, "task_loss": 1.2905465364456177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5502089262008667, "epoch": 5.32, "learning_rate": 2.6021414482952944e-05, "loss": 0.8555, "step": 6289, "task_loss": 0.7582305669784546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.451699137687683, "epoch": 5.32, "learning_rate": 2.6016718324410634e-05, "loss": 1.2322, "step": 6290, "task_loss": 1.3822592496871948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.121490478515625, "epoch": 5.32, "learning_rate": 2.601202216586832e-05, "loss": 1.1477, "step": 6291, "task_loss": 0.813787579536438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9492838382720947, "epoch": 5.32, "learning_rate": 2.600732600732601e-05, "loss": 0.8821, "step": 6292, "task_loss": 1.889599084854126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1156110763549805, "epoch": 5.32, "learning_rate": 2.6002629848783693e-05, "loss": 0.8706, "step": 6293, "task_loss": 1.081559419631958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3446491956710815, "epoch": 5.32, "learning_rate": 2.5997933690241383e-05, "loss": 1.2839, "step": 6294, "task_loss": 0.49927690625190735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3964911699295044, "epoch": 5.32, "learning_rate": 2.5993237531699073e-05, "loss": 1.1423, "step": 6295, "task_loss": 0.885152280330658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2084403038024902, "epoch": 5.32, "learning_rate": 2.598854137315676e-05, "loss": 1.2049, "step": 6296, "task_loss": 1.9564460515975952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0270271301269531, "epoch": 5.32, "learning_rate": 2.5983845214614445e-05, "loss": 1.0406, "step": 6297, "task_loss": 0.771664559841156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8043367862701416, "epoch": 5.32, "learning_rate": 2.5979149056072132e-05, "loss": 1.2423, "step": 6298, "task_loss": 0.32122018933296204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.3620638847351074, "epoch": 5.32, "learning_rate": 2.597445289752982e-05, "loss": 1.5326, "step": 6299, "task_loss": 2.170806884765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.071317195892334, "epoch": 5.33, "learning_rate": 2.596975673898751e-05, "loss": 1.1026, "step": 6300, "task_loss": 0.9808135628700256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0573582649230957, "epoch": 5.33, "learning_rate": 2.59650605804452e-05, "loss": 1.0526, "step": 6301, "task_loss": 2.1081016063690186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.929741382598877, "epoch": 5.33, "learning_rate": 2.5960364421902884e-05, "loss": 1.2154, "step": 6302, "task_loss": 1.0205265283584595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9894193410873413, "epoch": 5.33, "learning_rate": 2.5955668263360574e-05, "loss": 0.9829, "step": 6303, "task_loss": 1.2885124683380127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.090564250946045, "epoch": 5.33, "learning_rate": 2.595097210481826e-05, "loss": 1.1781, "step": 6304, "task_loss": 1.379763126373291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3314253091812134, "epoch": 5.33, "learning_rate": 2.594627594627595e-05, "loss": 1.1251, "step": 6305, "task_loss": 1.8114311695098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8091065883636475, "epoch": 5.33, "learning_rate": 2.5941579787733633e-05, "loss": 1.132, "step": 6306, "task_loss": 1.0272351503372192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9011757373809814, "epoch": 5.33, "learning_rate": 2.5936883629191322e-05, "loss": 1.0107, "step": 6307, "task_loss": 0.8120344877243042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2125520706176758, "epoch": 5.33, "learning_rate": 2.5932187470649012e-05, "loss": 1.2476, "step": 6308, "task_loss": 0.8350540399551392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2465773820877075, "epoch": 5.33, "learning_rate": 2.59274913121067e-05, "loss": 1.2381, "step": 6309, "task_loss": 0.7249597907066345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4670814275741577, "epoch": 5.33, "learning_rate": 2.5922795153564385e-05, "loss": 1.2253, "step": 6310, "task_loss": 0.03881332278251648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.893643856048584, "epoch": 5.33, "learning_rate": 2.591809899502207e-05, "loss": 1.3572, "step": 6311, "task_loss": 0.8605049252510071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6161984801292419, "epoch": 5.34, "learning_rate": 2.591340283647976e-05, "loss": 0.7624, "step": 6312, "task_loss": 0.1707698553800583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.6236913204193115, "epoch": 5.34, "learning_rate": 2.590870667793745e-05, "loss": 1.363, "step": 6313, "task_loss": 2.073456287384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2565042972564697, "epoch": 5.34, "learning_rate": 2.590401051939514e-05, "loss": 0.9292, "step": 6314, "task_loss": 0.464331716299057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8568236827850342, "epoch": 5.34, "learning_rate": 2.5899314360852823e-05, "loss": 0.917, "step": 6315, "task_loss": 1.047317624092102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9772505164146423, "epoch": 5.34, "learning_rate": 2.589461820231051e-05, "loss": 0.9018, "step": 6316, "task_loss": 0.35335761308670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.4276371002197266, "epoch": 5.34, "learning_rate": 2.58899220437682e-05, "loss": 1.3128, "step": 6317, "task_loss": 2.0015709400177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8495938181877136, "epoch": 5.34, "learning_rate": 2.588522588522589e-05, "loss": 0.9544, "step": 6318, "task_loss": 1.1158232688903809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2351930141448975, "epoch": 5.34, "learning_rate": 2.5880529726683572e-05, "loss": 1.1339, "step": 6319, "task_loss": 1.9154291152954102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8957180976867676, "epoch": 5.34, "learning_rate": 2.587583356814126e-05, "loss": 0.8173, "step": 6320, "task_loss": 1.326109766960144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6073119640350342, "epoch": 5.34, "learning_rate": 2.587113740959895e-05, "loss": 1.0063, "step": 6321, "task_loss": 1.1907532215118408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7874072790145874, "epoch": 5.34, "learning_rate": 2.5866441251056638e-05, "loss": 0.9163, "step": 6322, "task_loss": 0.5948822498321533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0012092590332031, "epoch": 5.34, "learning_rate": 2.586174509251432e-05, "loss": 0.8907, "step": 6323, "task_loss": 0.5844492316246033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7369522452354431, "epoch": 5.35, "learning_rate": 2.585704893397201e-05, "loss": 0.8512, "step": 6324, "task_loss": 1.0217374563217163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5832953453063965, "epoch": 5.35, "learning_rate": 2.58523527754297e-05, "loss": 1.066, "step": 6325, "task_loss": 1.427672266960144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9957594275474548, "epoch": 5.35, "learning_rate": 2.584765661688739e-05, "loss": 0.8375, "step": 6326, "task_loss": 0.669663667678833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1842681169509888, "epoch": 5.35, "learning_rate": 2.5842960458345073e-05, "loss": 0.9617, "step": 6327, "task_loss": 1.2276474237442017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0594830513000488, "epoch": 5.35, "learning_rate": 2.5838264299802763e-05, "loss": 1.1535, "step": 6328, "task_loss": 0.8553057909011841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0487747192382812, "epoch": 5.35, "learning_rate": 2.583356814126045e-05, "loss": 1.093, "step": 6329, "task_loss": 1.2517271041870117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0591256618499756, "epoch": 5.35, "learning_rate": 2.582887198271814e-05, "loss": 1.0196, "step": 6330, "task_loss": 1.2961130142211914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9135128259658813, "epoch": 5.35, "learning_rate": 2.582417582417583e-05, "loss": 1.1901, "step": 6331, "task_loss": 1.2081958055496216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0386688709259033, "epoch": 5.35, "learning_rate": 2.581947966563351e-05, "loss": 0.8925, "step": 6332, "task_loss": 1.1858305931091309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9114164710044861, "epoch": 5.35, "learning_rate": 2.58147835070912e-05, "loss": 0.8306, "step": 6333, "task_loss": 0.5890434980392456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6430786848068237, "epoch": 5.35, "learning_rate": 2.581008734854889e-05, "loss": 0.9828, "step": 6334, "task_loss": 0.5364454388618469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4051508903503418, "epoch": 5.35, "learning_rate": 2.5805391190006577e-05, "loss": 1.0941, "step": 6335, "task_loss": 1.1373642683029175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2859364748001099, "epoch": 5.36, "learning_rate": 2.580069503146426e-05, "loss": 1.2581, "step": 6336, "task_loss": 0.8994808793067932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.506266713142395, "epoch": 5.36, "learning_rate": 2.579599887292195e-05, "loss": 1.2173, "step": 6337, "task_loss": 0.8273641467094421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6704078316688538, "epoch": 5.36, "learning_rate": 2.579130271437964e-05, "loss": 0.9506, "step": 6338, "task_loss": 0.5349453687667847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2382078170776367, "epoch": 5.36, "learning_rate": 2.578660655583733e-05, "loss": 0.9746, "step": 6339, "task_loss": 1.844632625579834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.354675054550171, "epoch": 5.36, "learning_rate": 2.5781910397295012e-05, "loss": 1.1253, "step": 6340, "task_loss": 0.5944096446037292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9370590448379517, "epoch": 5.36, "learning_rate": 2.5777214238752702e-05, "loss": 0.7803, "step": 6341, "task_loss": 1.3131383657455444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9885606169700623, "epoch": 5.36, "learning_rate": 2.5772518080210388e-05, "loss": 1.1081, "step": 6342, "task_loss": 0.886330783367157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5954943895339966, "epoch": 5.36, "learning_rate": 2.5767821921668078e-05, "loss": 0.8166, "step": 6343, "task_loss": 0.5659304857254028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7303256988525391, "epoch": 5.36, "learning_rate": 2.576312576312576e-05, "loss": 1.0771, "step": 6344, "task_loss": 1.3235678672790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1352696418762207, "epoch": 5.36, "learning_rate": 2.575842960458345e-05, "loss": 1.1772, "step": 6345, "task_loss": 0.709708571434021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6683835983276367, "epoch": 5.36, "learning_rate": 2.575373344604114e-05, "loss": 0.796, "step": 6346, "task_loss": 0.7141897082328796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4479426145553589, "epoch": 5.36, "learning_rate": 2.5749037287498827e-05, "loss": 1.202, "step": 6347, "task_loss": 1.5755990743637085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.9838846921920776, "epoch": 5.37, "learning_rate": 2.5744341128956516e-05, "loss": 1.3168, "step": 6348, "task_loss": 1.615256428718567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.03053879737854, "epoch": 5.37, "learning_rate": 2.57396449704142e-05, "loss": 1.0314, "step": 6349, "task_loss": 0.6874434351921082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.283419609069824, "epoch": 5.37, "learning_rate": 2.573494881187189e-05, "loss": 1.567, "step": 6350, "task_loss": 2.063002824783325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8604526519775391, "epoch": 5.37, "learning_rate": 2.573025265332958e-05, "loss": 0.9037, "step": 6351, "task_loss": 0.9422600269317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3392837047576904, "epoch": 5.37, "learning_rate": 2.572555649478727e-05, "loss": 1.0315, "step": 6352, "task_loss": 1.213079810142517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4964888095855713, "epoch": 5.37, "learning_rate": 2.572086033624495e-05, "loss": 0.9462, "step": 6353, "task_loss": 1.4518071413040161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5437741279602051, "epoch": 5.37, "learning_rate": 2.571616417770264e-05, "loss": 1.0782, "step": 6354, "task_loss": 0.9972289800643921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3377244472503662, "epoch": 5.37, "learning_rate": 2.5711468019160328e-05, "loss": 1.2693, "step": 6355, "task_loss": 1.8939356803894043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9605881571769714, "epoch": 5.37, "learning_rate": 2.5706771860618017e-05, "loss": 0.9315, "step": 6356, "task_loss": 0.6263020634651184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9925759434700012, "epoch": 5.37, "learning_rate": 2.57020757020757e-05, "loss": 0.9375, "step": 6357, "task_loss": 0.5074746012687683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2417348623275757, "epoch": 5.37, "learning_rate": 2.569737954353339e-05, "loss": 1.2393, "step": 6358, "task_loss": 0.947325587272644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5062440633773804, "epoch": 5.38, "learning_rate": 2.569268338499108e-05, "loss": 0.9885, "step": 6359, "task_loss": 1.7746906280517578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0098721981048584, "epoch": 5.38, "learning_rate": 2.5687987226448766e-05, "loss": 1.1323, "step": 6360, "task_loss": 2.448911190032959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3488571643829346, "epoch": 5.38, "learning_rate": 2.5683291067906456e-05, "loss": 1.068, "step": 6361, "task_loss": 1.1848984956741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2297766208648682, "epoch": 5.38, "learning_rate": 2.567859490936414e-05, "loss": 1.0782, "step": 6362, "task_loss": 1.1673896312713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7162044644355774, "epoch": 5.38, "learning_rate": 2.567389875082183e-05, "loss": 0.939, "step": 6363, "task_loss": 0.730419933795929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8928181529045105, "epoch": 5.38, "learning_rate": 2.5669202592279518e-05, "loss": 1.2395, "step": 6364, "task_loss": 0.8238903284072876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8681780695915222, "epoch": 5.38, "learning_rate": 2.5664506433737208e-05, "loss": 1.0558, "step": 6365, "task_loss": 0.6736039519309998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8249871134757996, "epoch": 5.38, "learning_rate": 2.565981027519489e-05, "loss": 1.0272, "step": 6366, "task_loss": 0.7399210333824158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1848400831222534, "epoch": 5.38, "learning_rate": 2.5655114116652577e-05, "loss": 1.08, "step": 6367, "task_loss": 1.5130903720855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7539374232292175, "epoch": 5.38, "learning_rate": 2.5650417958110267e-05, "loss": 1.0892, "step": 6368, "task_loss": 0.2544321119785309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1642818450927734, "epoch": 5.38, "learning_rate": 2.5645721799567957e-05, "loss": 1.0631, "step": 6369, "task_loss": 1.002232551574707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8273664712905884, "epoch": 5.38, "learning_rate": 2.564102564102564e-05, "loss": 0.8958, "step": 6370, "task_loss": 0.3885570168495178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3125488758087158, "epoch": 5.39, "learning_rate": 2.563632948248333e-05, "loss": 1.0597, "step": 6371, "task_loss": 1.3921027183532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8883625864982605, "epoch": 5.39, "learning_rate": 2.563163332394102e-05, "loss": 1.0651, "step": 6372, "task_loss": 0.6870517134666443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7650696039199829, "epoch": 5.39, "learning_rate": 2.5626937165398705e-05, "loss": 0.7068, "step": 6373, "task_loss": 0.3232174217700958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5412269830703735, "epoch": 5.39, "learning_rate": 2.562224100685639e-05, "loss": 1.3732, "step": 6374, "task_loss": 1.5598081350326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8687403798103333, "epoch": 5.39, "learning_rate": 2.5617544848314078e-05, "loss": 1.1884, "step": 6375, "task_loss": 0.5333778262138367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9229332208633423, "epoch": 5.39, "learning_rate": 2.5612848689771768e-05, "loss": 0.9433, "step": 6376, "task_loss": 0.9237564206123352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8227337002754211, "epoch": 5.39, "learning_rate": 2.5608152531229458e-05, "loss": 1.0356, "step": 6377, "task_loss": 0.581084668636322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7740417718887329, "epoch": 5.39, "learning_rate": 2.5603456372687147e-05, "loss": 0.9609, "step": 6378, "task_loss": 0.7768348455429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7141458988189697, "epoch": 5.39, "learning_rate": 2.559876021414483e-05, "loss": 1.1227, "step": 6379, "task_loss": 1.545905351638794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1679857969284058, "epoch": 5.39, "learning_rate": 2.5594064055602517e-05, "loss": 1.1004, "step": 6380, "task_loss": 1.456630825996399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.066922664642334, "epoch": 5.39, "learning_rate": 2.5589367897060206e-05, "loss": 0.9414, "step": 6381, "task_loss": 1.155923843383789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3232605457305908, "epoch": 5.39, "learning_rate": 2.5584671738517896e-05, "loss": 1.1557, "step": 6382, "task_loss": 0.5759899020195007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7464649677276611, "epoch": 5.4, "learning_rate": 2.557997557997558e-05, "loss": 1.0929, "step": 6383, "task_loss": 1.3400397300720215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6620190143585205, "epoch": 5.4, "learning_rate": 2.557527942143327e-05, "loss": 0.9794, "step": 6384, "task_loss": 1.5556213855743408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8300684690475464, "epoch": 5.4, "learning_rate": 2.557058326289096e-05, "loss": 1.051, "step": 6385, "task_loss": 1.3163827657699585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0519795417785645, "epoch": 5.4, "learning_rate": 2.5565887104348645e-05, "loss": 0.9844, "step": 6386, "task_loss": 1.060746431350708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8546610474586487, "epoch": 5.4, "learning_rate": 2.5561190945806328e-05, "loss": 1.3334, "step": 6387, "task_loss": 0.619842529296875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1876801252365112, "epoch": 5.4, "learning_rate": 2.5556494787264017e-05, "loss": 1.092, "step": 6388, "task_loss": 0.8291508555412292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3396753966808319, "epoch": 5.4, "learning_rate": 2.5551798628721707e-05, "loss": 0.6644, "step": 6389, "task_loss": 0.029675286263227463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0201027393341064, "epoch": 5.4, "learning_rate": 2.5547102470179397e-05, "loss": 1.0454, "step": 6390, "task_loss": 1.4074045419692993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4152026176452637, "epoch": 5.4, "learning_rate": 2.5542406311637083e-05, "loss": 1.1861, "step": 6391, "task_loss": 1.4105784893035889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0058245658874512, "epoch": 5.4, "learning_rate": 2.553771015309477e-05, "loss": 0.9379, "step": 6392, "task_loss": 0.800417423248291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9238878488540649, "epoch": 5.4, "learning_rate": 2.5533013994552456e-05, "loss": 1.1094, "step": 6393, "task_loss": 0.30619028210639954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3001458644866943, "epoch": 5.4, "learning_rate": 2.5528317836010146e-05, "loss": 1.2588, "step": 6394, "task_loss": 0.840109646320343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0764744281768799, "epoch": 5.41, "learning_rate": 2.5523621677467835e-05, "loss": 0.9288, "step": 6395, "task_loss": 1.2149934768676758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6389381885528564, "epoch": 5.41, "learning_rate": 2.551892551892552e-05, "loss": 0.8799, "step": 6396, "task_loss": 0.4354816675186157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0098598003387451, "epoch": 5.41, "learning_rate": 2.5514229360383208e-05, "loss": 1.2358, "step": 6397, "task_loss": 1.1145069599151611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0959441661834717, "epoch": 5.41, "learning_rate": 2.5509533201840898e-05, "loss": 1.1208, "step": 6398, "task_loss": 0.48302799463272095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.240706443786621, "epoch": 5.41, "learning_rate": 2.5504837043298584e-05, "loss": 1.3133, "step": 6399, "task_loss": 1.481557011604309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9447590708732605, "epoch": 5.41, "learning_rate": 2.5500140884756267e-05, "loss": 1.0729, "step": 6400, "task_loss": 1.2816162109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49677690863609314, "epoch": 5.41, "learning_rate": 2.5495444726213957e-05, "loss": 0.8362, "step": 6401, "task_loss": 0.8680558800697327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7769114971160889, "epoch": 5.41, "learning_rate": 2.5490748567671647e-05, "loss": 1.2789, "step": 6402, "task_loss": 1.5310560464859009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8306081295013428, "epoch": 5.41, "learning_rate": 2.5486052409129336e-05, "loss": 0.8416, "step": 6403, "task_loss": 1.3402783870697021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0188164710998535, "epoch": 5.41, "learning_rate": 2.548135625058702e-05, "loss": 0.9534, "step": 6404, "task_loss": 0.9778039455413818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6441875696182251, "epoch": 5.41, "learning_rate": 2.547666009204471e-05, "loss": 0.9517, "step": 6405, "task_loss": 0.8688264489173889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.087812066078186, "epoch": 5.41, "learning_rate": 2.5471963933502395e-05, "loss": 0.885, "step": 6406, "task_loss": 1.3358337879180908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7328872680664062, "epoch": 5.42, "learning_rate": 2.5467267774960085e-05, "loss": 0.7827, "step": 6407, "task_loss": 0.5390071868896484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6655393242835999, "epoch": 5.42, "learning_rate": 2.5462571616417775e-05, "loss": 0.7414, "step": 6408, "task_loss": 0.8244444727897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.172795295715332, "epoch": 5.42, "learning_rate": 2.5457875457875458e-05, "loss": 0.8842, "step": 6409, "task_loss": 0.9633015990257263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4870212078094482, "epoch": 5.42, "learning_rate": 2.5453179299333147e-05, "loss": 1.1255, "step": 6410, "task_loss": 1.8594094514846802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5139474868774414, "epoch": 5.42, "learning_rate": 2.5448483140790834e-05, "loss": 0.9578, "step": 6411, "task_loss": 1.861824631690979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4425194263458252, "epoch": 5.42, "learning_rate": 2.5443786982248524e-05, "loss": 1.3059, "step": 6412, "task_loss": 1.045417070388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9047155380249023, "epoch": 5.42, "learning_rate": 2.5439090823706206e-05, "loss": 0.7934, "step": 6413, "task_loss": 1.5678315162658691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4728127717971802, "epoch": 5.42, "learning_rate": 2.5434394665163896e-05, "loss": 1.0935, "step": 6414, "task_loss": 2.218679904937744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1783052682876587, "epoch": 5.42, "learning_rate": 2.5429698506621586e-05, "loss": 1.039, "step": 6415, "task_loss": 0.8370128273963928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5235366821289062, "epoch": 5.42, "learning_rate": 2.5425002348079276e-05, "loss": 1.0823, "step": 6416, "task_loss": 1.5622249841690063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6198564767837524, "epoch": 5.42, "learning_rate": 2.542030618953696e-05, "loss": 1.3681, "step": 6417, "task_loss": 2.1341352462768555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9066821336746216, "epoch": 5.42, "learning_rate": 2.5415610030994645e-05, "loss": 1.0496, "step": 6418, "task_loss": 0.7548201084136963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.037347674369812, "epoch": 5.43, "learning_rate": 2.5410913872452335e-05, "loss": 1.086, "step": 6419, "task_loss": 1.3194986581802368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.062352180480957, "epoch": 5.43, "learning_rate": 2.5406217713910024e-05, "loss": 1.201, "step": 6420, "task_loss": 0.6786300539970398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9179903268814087, "epoch": 5.43, "learning_rate": 2.5401521555367707e-05, "loss": 0.8355, "step": 6421, "task_loss": 1.4967608451843262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0355240106582642, "epoch": 5.43, "learning_rate": 2.5396825396825397e-05, "loss": 0.8997, "step": 6422, "task_loss": 0.616295337677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0410408973693848, "epoch": 5.43, "learning_rate": 2.5392129238283087e-05, "loss": 1.0801, "step": 6423, "task_loss": 0.7140810489654541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6238728165626526, "epoch": 5.43, "learning_rate": 2.5387433079740773e-05, "loss": 0.8609, "step": 6424, "task_loss": 0.3634946942329407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5095426440238953, "epoch": 5.43, "learning_rate": 2.5382736921198463e-05, "loss": 0.8151, "step": 6425, "task_loss": 0.16483628749847412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0267832279205322, "epoch": 5.43, "learning_rate": 2.5378040762656146e-05, "loss": 0.8936, "step": 6426, "task_loss": 0.7907918691635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3954848051071167, "epoch": 5.43, "learning_rate": 2.5373344604113836e-05, "loss": 1.0977, "step": 6427, "task_loss": 1.5698412656784058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9310122728347778, "epoch": 5.43, "learning_rate": 2.5368648445571525e-05, "loss": 0.8873, "step": 6428, "task_loss": 0.8901378512382507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1874616146087646, "epoch": 5.43, "learning_rate": 2.5363952287029215e-05, "loss": 1.1701, "step": 6429, "task_loss": 0.5387741327285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7063838243484497, "epoch": 5.44, "learning_rate": 2.5359256128486898e-05, "loss": 0.7329, "step": 6430, "task_loss": 0.17581811547279358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0568475723266602, "epoch": 5.44, "learning_rate": 2.5354559969944584e-05, "loss": 1.2625, "step": 6431, "task_loss": 0.5060216188430786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6904717087745667, "epoch": 5.44, "learning_rate": 2.5349863811402274e-05, "loss": 0.7078, "step": 6432, "task_loss": 0.405549019575119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9340455532073975, "epoch": 5.44, "learning_rate": 2.5345167652859964e-05, "loss": 1.2024, "step": 6433, "task_loss": 1.4663316011428833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0381760597229004, "epoch": 5.44, "learning_rate": 2.5340471494317647e-05, "loss": 1.2214, "step": 6434, "task_loss": 1.3641736507415771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1436653137207031, "epoch": 5.44, "learning_rate": 2.5335775335775336e-05, "loss": 0.8764, "step": 6435, "task_loss": 0.855593740940094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9543707370758057, "epoch": 5.44, "learning_rate": 2.5331079177233026e-05, "loss": 0.7474, "step": 6436, "task_loss": 0.2517108917236328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6863232851028442, "epoch": 5.44, "learning_rate": 2.5326383018690712e-05, "loss": 1.1251, "step": 6437, "task_loss": 1.0124092102050781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6661593914031982, "epoch": 5.44, "learning_rate": 2.5321686860148402e-05, "loss": 0.7665, "step": 6438, "task_loss": 0.5492129325866699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6640273928642273, "epoch": 5.44, "learning_rate": 2.5316990701606085e-05, "loss": 0.7951, "step": 6439, "task_loss": 0.9891870021820068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8081628084182739, "epoch": 5.44, "learning_rate": 2.5312294543063775e-05, "loss": 0.9379, "step": 6440, "task_loss": 1.1161000728607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2484506368637085, "epoch": 5.44, "learning_rate": 2.5307598384521465e-05, "loss": 1.0055, "step": 6441, "task_loss": 1.342250943183899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0063486099243164, "epoch": 5.45, "learning_rate": 2.530290222597915e-05, "loss": 1.1021, "step": 6442, "task_loss": 1.7969468832015991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9339218139648438, "epoch": 5.45, "learning_rate": 2.5298206067436837e-05, "loss": 0.8242, "step": 6443, "task_loss": 1.0983366966247559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.0517544746398926, "epoch": 5.45, "learning_rate": 2.5293509908894524e-05, "loss": 1.0788, "step": 6444, "task_loss": 1.6302683353424072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5570580959320068, "epoch": 5.45, "learning_rate": 2.5288813750352213e-05, "loss": 1.2728, "step": 6445, "task_loss": 0.9016123414039612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.042505979537964, "epoch": 5.45, "learning_rate": 2.5284117591809903e-05, "loss": 1.3635, "step": 6446, "task_loss": 2.062483549118042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9029207229614258, "epoch": 5.45, "learning_rate": 2.5279421433267586e-05, "loss": 0.9055, "step": 6447, "task_loss": 0.47686198353767395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1344373226165771, "epoch": 5.45, "learning_rate": 2.5274725274725276e-05, "loss": 1.0683, "step": 6448, "task_loss": 1.6097692251205444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5122966766357422, "epoch": 5.45, "learning_rate": 2.5270029116182966e-05, "loss": 1.0566, "step": 6449, "task_loss": 1.5595813989639282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.3242008686065674, "epoch": 5.45, "learning_rate": 2.5265332957640652e-05, "loss": 1.1961, "step": 6450, "task_loss": 0.9449503421783447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2608683109283447, "epoch": 5.45, "learning_rate": 2.5260636799098335e-05, "loss": 0.9495, "step": 6451, "task_loss": 1.287846565246582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7174100875854492, "epoch": 5.45, "learning_rate": 2.5255940640556025e-05, "loss": 1.059, "step": 6452, "task_loss": 0.5379125475883484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6162874698638916, "epoch": 5.45, "learning_rate": 2.5251244482013714e-05, "loss": 0.9009, "step": 6453, "task_loss": 1.0124763250350952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2456603050231934, "epoch": 5.46, "learning_rate": 2.5246548323471404e-05, "loss": 1.2965, "step": 6454, "task_loss": 1.1493887901306152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9956827163696289, "epoch": 5.46, "learning_rate": 2.524185216492909e-05, "loss": 0.9841, "step": 6455, "task_loss": 0.7108720541000366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8716190457344055, "epoch": 5.46, "learning_rate": 2.5237156006386777e-05, "loss": 1.0453, "step": 6456, "task_loss": 0.8884397149085999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6005128622055054, "epoch": 5.46, "learning_rate": 2.5232459847844463e-05, "loss": 0.8864, "step": 6457, "task_loss": 0.6114558577537537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0184158086776733, "epoch": 5.46, "learning_rate": 2.5227763689302153e-05, "loss": 0.8465, "step": 6458, "task_loss": 0.522857129573822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0012152194976807, "epoch": 5.46, "learning_rate": 2.5223067530759842e-05, "loss": 0.8736, "step": 6459, "task_loss": 0.3753816485404968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.572245478630066, "epoch": 5.46, "learning_rate": 2.5218371372217525e-05, "loss": 1.1394, "step": 6460, "task_loss": 1.2705318927764893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3130083084106445, "epoch": 5.46, "learning_rate": 2.5213675213675215e-05, "loss": 1.0463, "step": 6461, "task_loss": 1.9780476093292236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9595435857772827, "epoch": 5.46, "learning_rate": 2.52089790551329e-05, "loss": 0.7901, "step": 6462, "task_loss": 0.6375880241394043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6294265985488892, "epoch": 5.46, "learning_rate": 2.520428289659059e-05, "loss": 0.9821, "step": 6463, "task_loss": 0.4364987313747406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9522368311882019, "epoch": 5.46, "learning_rate": 2.5199586738048274e-05, "loss": 0.8825, "step": 6464, "task_loss": 0.6067593097686768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.031280517578125, "epoch": 5.46, "learning_rate": 2.5194890579505964e-05, "loss": 1.0753, "step": 6465, "task_loss": 2.067537784576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9081988334655762, "epoch": 5.47, "learning_rate": 2.5190194420963654e-05, "loss": 0.9152, "step": 6466, "task_loss": 0.5675429105758667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1158198118209839, "epoch": 5.47, "learning_rate": 2.5185498262421343e-05, "loss": 1.0251, "step": 6467, "task_loss": 1.5512065887451172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5131148099899292, "epoch": 5.47, "learning_rate": 2.518080210387903e-05, "loss": 0.9735, "step": 6468, "task_loss": 0.6444335579872131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.117063045501709, "epoch": 5.47, "learning_rate": 2.5176105945336713e-05, "loss": 1.369, "step": 6469, "task_loss": 2.939657688140869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5342209339141846, "epoch": 5.47, "learning_rate": 2.5171409786794402e-05, "loss": 0.8961, "step": 6470, "task_loss": 0.44115594029426575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.410351037979126, "epoch": 5.47, "learning_rate": 2.5166713628252092e-05, "loss": 1.0599, "step": 6471, "task_loss": 1.2014350891113281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7803982496261597, "epoch": 5.47, "learning_rate": 2.5162017469709782e-05, "loss": 0.897, "step": 6472, "task_loss": 0.6406145095825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2381980419158936, "epoch": 5.47, "learning_rate": 2.5157321311167465e-05, "loss": 1.0642, "step": 6473, "task_loss": 1.8437703847885132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2310930490493774, "epoch": 5.47, "learning_rate": 2.5152625152625155e-05, "loss": 1.0296, "step": 6474, "task_loss": 1.4089546203613281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2360575199127197, "epoch": 5.47, "learning_rate": 2.514792899408284e-05, "loss": 0.7497, "step": 6475, "task_loss": 0.6520443558692932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6642756462097168, "epoch": 5.47, "learning_rate": 2.514323283554053e-05, "loss": 0.8133, "step": 6476, "task_loss": 1.9031150341033936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.697030782699585, "epoch": 5.47, "learning_rate": 2.5138536676998214e-05, "loss": 1.1608, "step": 6477, "task_loss": 1.2578227519989014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2124602794647217, "epoch": 5.48, "learning_rate": 2.5133840518455903e-05, "loss": 1.0097, "step": 6478, "task_loss": 1.5414373874664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6035012006759644, "epoch": 5.48, "learning_rate": 2.5129144359913593e-05, "loss": 0.9916, "step": 6479, "task_loss": 0.37748512625694275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.839076042175293, "epoch": 5.48, "learning_rate": 2.5124448201371283e-05, "loss": 0.8477, "step": 6480, "task_loss": 0.9630509614944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9062399864196777, "epoch": 5.48, "learning_rate": 2.5119752042828966e-05, "loss": 0.7406, "step": 6481, "task_loss": 0.5637525320053101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.426987886428833, "epoch": 5.48, "learning_rate": 2.5115055884286652e-05, "loss": 1.4049, "step": 6482, "task_loss": 1.1638402938842773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1536519527435303, "epoch": 5.48, "learning_rate": 2.5110359725744342e-05, "loss": 1.0041, "step": 6483, "task_loss": 0.666979193687439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.410020112991333, "epoch": 5.48, "learning_rate": 2.510566356720203e-05, "loss": 1.0886, "step": 6484, "task_loss": 0.9850988388061523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0810198783874512, "epoch": 5.48, "learning_rate": 2.510096740865972e-05, "loss": 1.1591, "step": 6485, "task_loss": 1.395552635192871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9863530993461609, "epoch": 5.48, "learning_rate": 2.5096271250117404e-05, "loss": 1.1201, "step": 6486, "task_loss": 1.1136263608932495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7814576029777527, "epoch": 5.48, "learning_rate": 2.5091575091575094e-05, "loss": 1.1302, "step": 6487, "task_loss": 0.8994828462600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1980841159820557, "epoch": 5.48, "learning_rate": 2.508687893303278e-05, "loss": 1.0086, "step": 6488, "task_loss": 0.6037781238555908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0863840579986572, "epoch": 5.48, "learning_rate": 2.508218277449047e-05, "loss": 1.0447, "step": 6489, "task_loss": 0.7677891850471497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4626713991165161, "epoch": 5.49, "learning_rate": 2.5077486615948153e-05, "loss": 1.1299, "step": 6490, "task_loss": 1.5354024171829224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8204333782196045, "epoch": 5.49, "learning_rate": 2.5072790457405843e-05, "loss": 1.0671, "step": 6491, "task_loss": 1.9802935123443604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0774654150009155, "epoch": 5.49, "learning_rate": 2.5068094298863532e-05, "loss": 0.7544, "step": 6492, "task_loss": 0.3735019564628601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3140161037445068, "epoch": 5.49, "learning_rate": 2.506339814032122e-05, "loss": 1.0066, "step": 6493, "task_loss": 1.0784987211227417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0194470882415771, "epoch": 5.49, "learning_rate": 2.5058701981778905e-05, "loss": 0.8462, "step": 6494, "task_loss": 0.5586204528808594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6462347507476807, "epoch": 5.49, "learning_rate": 2.505400582323659e-05, "loss": 1.207, "step": 6495, "task_loss": 0.7948214411735535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2999684810638428, "epoch": 5.49, "learning_rate": 2.504930966469428e-05, "loss": 1.0452, "step": 6496, "task_loss": 1.4566434621810913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2516119480133057, "epoch": 5.49, "learning_rate": 2.504461350615197e-05, "loss": 0.9019, "step": 6497, "task_loss": 1.380259394645691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8307926654815674, "epoch": 5.49, "learning_rate": 2.5039917347609654e-05, "loss": 0.8174, "step": 6498, "task_loss": 0.9373601078987122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0203444957733154, "epoch": 5.49, "learning_rate": 2.5035221189067343e-05, "loss": 0.7014, "step": 6499, "task_loss": 0.8968945741653442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3474643230438232, "epoch": 5.49, "learning_rate": 2.5030525030525033e-05, "loss": 1.0668, "step": 6500, "task_loss": 0.5559388399124146 }, { "epoch": 5.49, "eval_accuracy": 0.873980198019802, "eval_loss": 0.605010449886322, "eval_runtime": 225.7443, "eval_samples_per_second": 111.852, "eval_steps_per_second": 0.877, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6095402836799622, "epoch": 5.5, "learning_rate": 2.502582887198272e-05, "loss": 0.802, "step": 6501, "task_loss": 0.6356408596038818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7141250967979431, "epoch": 5.5, "learning_rate": 2.502113271344041e-05, "loss": 0.8824, "step": 6502, "task_loss": 0.9097650051116943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8958068490028381, "epoch": 5.5, "learning_rate": 2.5016436554898092e-05, "loss": 1.0119, "step": 6503, "task_loss": 0.6776978969573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.398125410079956, "epoch": 5.5, "learning_rate": 2.5011740396355782e-05, "loss": 1.1917, "step": 6504, "task_loss": 1.3158059120178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4891916513442993, "epoch": 5.5, "learning_rate": 2.500704423781347e-05, "loss": 0.852, "step": 6505, "task_loss": 0.9764769077301025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1734192371368408, "epoch": 5.5, "learning_rate": 2.5002348079271158e-05, "loss": 1.0656, "step": 6506, "task_loss": 1.528217077255249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8117177486419678, "epoch": 5.5, "learning_rate": 2.4997651920728844e-05, "loss": 0.9229, "step": 6507, "task_loss": 1.1204307079315186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7633861899375916, "epoch": 5.5, "learning_rate": 2.499295576218653e-05, "loss": 0.9314, "step": 6508, "task_loss": 0.5120180249214172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0024962425231934, "epoch": 5.5, "learning_rate": 2.498825960364422e-05, "loss": 1.0553, "step": 6509, "task_loss": 0.9422429203987122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6643247604370117, "epoch": 5.5, "learning_rate": 2.4983563445101907e-05, "loss": 0.8235, "step": 6510, "task_loss": 0.928342342376709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6740977764129639, "epoch": 5.5, "learning_rate": 2.4978867286559597e-05, "loss": 1.2143, "step": 6511, "task_loss": 1.1002947092056274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2026362419128418, "epoch": 5.5, "learning_rate": 2.4974171128017283e-05, "loss": 1.0856, "step": 6512, "task_loss": 1.3967161178588867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.417423129081726, "epoch": 5.51, "learning_rate": 2.496947496947497e-05, "loss": 0.9627, "step": 6513, "task_loss": 1.164745807647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9823552370071411, "epoch": 5.51, "learning_rate": 2.4964778810932656e-05, "loss": 0.9378, "step": 6514, "task_loss": 1.4372062683105469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.463081955909729, "epoch": 5.51, "learning_rate": 2.4960082652390345e-05, "loss": 0.8495, "step": 6515, "task_loss": 1.3777320384979248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7449542880058289, "epoch": 5.51, "learning_rate": 2.4955386493848035e-05, "loss": 0.9332, "step": 6516, "task_loss": 0.9524458646774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.097550392150879, "epoch": 5.51, "learning_rate": 2.495069033530572e-05, "loss": 0.9055, "step": 6517, "task_loss": 0.8129556179046631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.516600251197815, "epoch": 5.51, "learning_rate": 2.494599417676341e-05, "loss": 1.0558, "step": 6518, "task_loss": 1.2795346975326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8112541437149048, "epoch": 5.51, "learning_rate": 2.4941298018221097e-05, "loss": 1.153, "step": 6519, "task_loss": 1.0764656066894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9789175987243652, "epoch": 5.51, "learning_rate": 2.4936601859678784e-05, "loss": 0.9791, "step": 6520, "task_loss": 0.7618276476860046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.289778232574463, "epoch": 5.51, "learning_rate": 2.493190570113647e-05, "loss": 1.1099, "step": 6521, "task_loss": 0.5469610691070557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9881772994995117, "epoch": 5.51, "learning_rate": 2.492720954259416e-05, "loss": 0.7864, "step": 6522, "task_loss": 1.1781964302062988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2679753303527832, "epoch": 5.51, "learning_rate": 2.4922513384051846e-05, "loss": 1.1556, "step": 6523, "task_loss": 1.5470633506774902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6113263964653015, "epoch": 5.51, "learning_rate": 2.4917817225509536e-05, "loss": 0.8729, "step": 6524, "task_loss": 0.2104288637638092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9389169812202454, "epoch": 5.52, "learning_rate": 2.4913121066967222e-05, "loss": 0.8711, "step": 6525, "task_loss": 0.9308289289474487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7133930325508118, "epoch": 5.52, "learning_rate": 2.490842490842491e-05, "loss": 1.1651, "step": 6526, "task_loss": 0.25973373651504517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.632519543170929, "epoch": 5.52, "learning_rate": 2.4903728749882595e-05, "loss": 0.7514, "step": 6527, "task_loss": 0.49152323603630066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2376489639282227, "epoch": 5.52, "learning_rate": 2.4899032591340285e-05, "loss": 1.2604, "step": 6528, "task_loss": 1.007156252861023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8465752601623535, "epoch": 5.52, "learning_rate": 2.489433643279797e-05, "loss": 0.9575, "step": 6529, "task_loss": 0.8186055421829224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1092650890350342, "epoch": 5.52, "learning_rate": 2.488964027425566e-05, "loss": 1.1083, "step": 6530, "task_loss": 1.4498447179794312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7407197952270508, "epoch": 5.52, "learning_rate": 2.488494411571335e-05, "loss": 0.8756, "step": 6531, "task_loss": 1.1178863048553467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34309059381484985, "epoch": 5.52, "learning_rate": 2.4880247957171037e-05, "loss": 0.6501, "step": 6532, "task_loss": 0.2862367331981659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0120652914047241, "epoch": 5.52, "learning_rate": 2.4875551798628723e-05, "loss": 1.0923, "step": 6533, "task_loss": 1.1884217262268066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.733035147190094, "epoch": 5.52, "learning_rate": 2.487085564008641e-05, "loss": 0.9447, "step": 6534, "task_loss": 0.7153406739234924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3868988752365112, "epoch": 5.52, "learning_rate": 2.48661594815441e-05, "loss": 1.0083, "step": 6535, "task_loss": 2.1528139114379883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.579030454158783, "epoch": 5.52, "learning_rate": 2.4861463323001785e-05, "loss": 1.095, "step": 6536, "task_loss": 0.6510944366455078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1954929828643799, "epoch": 5.53, "learning_rate": 2.4856767164459475e-05, "loss": 1.2041, "step": 6537, "task_loss": 1.4745815992355347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9510072469711304, "epoch": 5.53, "learning_rate": 2.485207100591716e-05, "loss": 0.9835, "step": 6538, "task_loss": 0.5887629985809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1454983949661255, "epoch": 5.53, "learning_rate": 2.4847374847374848e-05, "loss": 0.9722, "step": 6539, "task_loss": 1.276520013809204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0835111141204834, "epoch": 5.53, "learning_rate": 2.4842678688832534e-05, "loss": 1.1358, "step": 6540, "task_loss": 0.9604179859161377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5456302165985107, "epoch": 5.53, "learning_rate": 2.4837982530290224e-05, "loss": 1.2501, "step": 6541, "task_loss": 1.2436788082122803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1599609851837158, "epoch": 5.53, "learning_rate": 2.483328637174791e-05, "loss": 0.9358, "step": 6542, "task_loss": 0.6180224418640137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7082446813583374, "epoch": 5.53, "learning_rate": 2.48285902132056e-05, "loss": 0.9288, "step": 6543, "task_loss": 0.9486300349235535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8146706819534302, "epoch": 5.53, "learning_rate": 2.4823894054663286e-05, "loss": 0.9999, "step": 6544, "task_loss": 1.2737942934036255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5037171244621277, "epoch": 5.53, "learning_rate": 2.4819197896120973e-05, "loss": 0.6562, "step": 6545, "task_loss": 0.359212726354599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4646756947040558, "epoch": 5.53, "learning_rate": 2.4814501737578662e-05, "loss": 0.7019, "step": 6546, "task_loss": 0.7566305994987488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5524226427078247, "epoch": 5.53, "learning_rate": 2.480980557903635e-05, "loss": 1.1367, "step": 6547, "task_loss": 0.037734489887952805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9199973940849304, "epoch": 5.53, "learning_rate": 2.480510942049404e-05, "loss": 0.929, "step": 6548, "task_loss": 0.3293412923812866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7145240306854248, "epoch": 5.54, "learning_rate": 2.4800413261951725e-05, "loss": 0.9385, "step": 6549, "task_loss": 1.1084599494934082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1709107160568237, "epoch": 5.54, "learning_rate": 2.4795717103409415e-05, "loss": 0.9251, "step": 6550, "task_loss": 1.1088004112243652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7485247254371643, "epoch": 5.54, "learning_rate": 2.47910209448671e-05, "loss": 0.7682, "step": 6551, "task_loss": 0.815598726272583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6231138706207275, "epoch": 5.54, "learning_rate": 2.4786324786324787e-05, "loss": 0.9299, "step": 6552, "task_loss": 1.2794914245605469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7162492871284485, "epoch": 5.54, "learning_rate": 2.4781628627782474e-05, "loss": 0.9879, "step": 6553, "task_loss": 0.5725975632667542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.915233314037323, "epoch": 5.54, "learning_rate": 2.4776932469240163e-05, "loss": 1.1898, "step": 6554, "task_loss": 1.13390052318573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9578989744186401, "epoch": 5.54, "learning_rate": 2.477223631069785e-05, "loss": 0.8763, "step": 6555, "task_loss": 0.8744196891784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5787204504013062, "epoch": 5.54, "learning_rate": 2.476754015215554e-05, "loss": 0.9306, "step": 6556, "task_loss": 0.6602758765220642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5607211589813232, "epoch": 5.54, "learning_rate": 2.4762843993613226e-05, "loss": 0.7748, "step": 6557, "task_loss": 1.2263411283493042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1356621980667114, "epoch": 5.54, "learning_rate": 2.4758147835070912e-05, "loss": 1.0768, "step": 6558, "task_loss": 1.7146135568618774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.689918041229248, "epoch": 5.54, "learning_rate": 2.47534516765286e-05, "loss": 0.8194, "step": 6559, "task_loss": 0.4123992323875427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4333256781101227, "epoch": 5.54, "learning_rate": 2.4748755517986288e-05, "loss": 0.7273, "step": 6560, "task_loss": 0.5912057757377625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2082090377807617, "epoch": 5.55, "learning_rate": 2.4744059359443978e-05, "loss": 1.0529, "step": 6561, "task_loss": 1.2178003787994385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.495344877243042, "epoch": 5.55, "learning_rate": 2.4739363200901664e-05, "loss": 1.1026, "step": 6562, "task_loss": 1.2726677656173706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.785637617111206, "epoch": 5.55, "learning_rate": 2.4734667042359354e-05, "loss": 0.7171, "step": 6563, "task_loss": 0.5261896848678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5374695658683777, "epoch": 5.55, "learning_rate": 2.4729970883817037e-05, "loss": 1.0461, "step": 6564, "task_loss": 0.7869671583175659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.035266637802124, "epoch": 5.55, "learning_rate": 2.4725274725274727e-05, "loss": 0.9079, "step": 6565, "task_loss": 1.838982105255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.878603994846344, "epoch": 5.55, "learning_rate": 2.4720578566732413e-05, "loss": 0.9438, "step": 6566, "task_loss": 1.0689141750335693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1558685302734375, "epoch": 5.55, "learning_rate": 2.4715882408190103e-05, "loss": 0.8151, "step": 6567, "task_loss": 0.493290513753891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6507989168167114, "epoch": 5.55, "learning_rate": 2.471118624964779e-05, "loss": 0.9318, "step": 6568, "task_loss": 1.5130853652954102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9253194332122803, "epoch": 5.55, "learning_rate": 2.470649009110548e-05, "loss": 1.2166, "step": 6569, "task_loss": 0.41528743505477905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4330897331237793, "epoch": 5.55, "learning_rate": 2.4701793932563165e-05, "loss": 0.8243, "step": 6570, "task_loss": 2.011565685272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.149446964263916, "epoch": 5.55, "learning_rate": 2.469709777402085e-05, "loss": 0.9914, "step": 6571, "task_loss": 1.4145113229751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0650615692138672, "epoch": 5.56, "learning_rate": 2.4692401615478538e-05, "loss": 0.9216, "step": 6572, "task_loss": 0.7961815595626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0481393337249756, "epoch": 5.56, "learning_rate": 2.4687705456936227e-05, "loss": 0.8509, "step": 6573, "task_loss": 0.45247191190719604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0091030597686768, "epoch": 5.56, "learning_rate": 2.4683009298393914e-05, "loss": 0.926, "step": 6574, "task_loss": 1.0393537282943726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0851750373840332, "epoch": 5.56, "learning_rate": 2.4678313139851604e-05, "loss": 0.8587, "step": 6575, "task_loss": 1.1208674907684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9263315796852112, "epoch": 5.56, "learning_rate": 2.467361698130929e-05, "loss": 1.138, "step": 6576, "task_loss": 0.6099129319190979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2647969722747803, "epoch": 5.56, "learning_rate": 2.4668920822766976e-05, "loss": 0.9265, "step": 6577, "task_loss": 1.2099357843399048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0278624296188354, "epoch": 5.56, "learning_rate": 2.4664224664224666e-05, "loss": 0.966, "step": 6578, "task_loss": 1.6882303953170776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40831315517425537, "epoch": 5.56, "learning_rate": 2.4659528505682352e-05, "loss": 0.6916, "step": 6579, "task_loss": 0.1509709507226944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8878034353256226, "epoch": 5.56, "learning_rate": 2.4654832347140042e-05, "loss": 0.788, "step": 6580, "task_loss": 1.2449076175689697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6834591627120972, "epoch": 5.56, "learning_rate": 2.465013618859773e-05, "loss": 0.8149, "step": 6581, "task_loss": 0.617369532585144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0745313167572021, "epoch": 5.56, "learning_rate": 2.4645440030055418e-05, "loss": 0.9187, "step": 6582, "task_loss": 2.049734592437744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0423119068145752, "epoch": 5.56, "learning_rate": 2.4640743871513104e-05, "loss": 1.0125, "step": 6583, "task_loss": 0.963222086429596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.712994396686554, "epoch": 5.57, "learning_rate": 2.463604771297079e-05, "loss": 0.7283, "step": 6584, "task_loss": 0.749672532081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8948954939842224, "epoch": 5.57, "learning_rate": 2.4631351554428477e-05, "loss": 0.7689, "step": 6585, "task_loss": 1.0379952192306519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7258709669113159, "epoch": 5.57, "learning_rate": 2.4626655395886167e-05, "loss": 0.9312, "step": 6586, "task_loss": 0.9954637885093689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9688941240310669, "epoch": 5.57, "learning_rate": 2.4621959237343853e-05, "loss": 0.8991, "step": 6587, "task_loss": 0.9115315675735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5612451434135437, "epoch": 5.57, "learning_rate": 2.4617263078801543e-05, "loss": 0.7929, "step": 6588, "task_loss": 0.45403170585632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7728501558303833, "epoch": 5.57, "learning_rate": 2.461256692025923e-05, "loss": 0.8073, "step": 6589, "task_loss": 0.8065351843833923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9504995346069336, "epoch": 5.57, "learning_rate": 2.4607870761716916e-05, "loss": 0.9986, "step": 6590, "task_loss": 1.0132135152816772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.986470639705658, "epoch": 5.57, "learning_rate": 2.4603174603174602e-05, "loss": 1.0332, "step": 6591, "task_loss": 0.8361361026763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5902015566825867, "epoch": 5.57, "learning_rate": 2.459847844463229e-05, "loss": 0.7914, "step": 6592, "task_loss": 0.7981234192848206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8932263851165771, "epoch": 5.57, "learning_rate": 2.459378228608998e-05, "loss": 0.9073, "step": 6593, "task_loss": 0.43015536665916443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9800308346748352, "epoch": 5.57, "learning_rate": 2.4589086127547668e-05, "loss": 1.1785, "step": 6594, "task_loss": 1.1460130214691162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5378355979919434, "epoch": 5.57, "learning_rate": 2.4584389969005357e-05, "loss": 1.2178, "step": 6595, "task_loss": 1.2029228210449219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.266120672225952, "epoch": 5.58, "learning_rate": 2.457969381046304e-05, "loss": 1.3393, "step": 6596, "task_loss": 1.5405868291854858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7765187621116638, "epoch": 5.58, "learning_rate": 2.457499765192073e-05, "loss": 0.8651, "step": 6597, "task_loss": 0.1597270369529724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6172748804092407, "epoch": 5.58, "learning_rate": 2.4570301493378416e-05, "loss": 0.828, "step": 6598, "task_loss": 0.2836361825466156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8816103339195251, "epoch": 5.58, "learning_rate": 2.4565605334836106e-05, "loss": 0.9148, "step": 6599, "task_loss": 0.6980390548706055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9197078347206116, "epoch": 5.58, "learning_rate": 2.4560909176293793e-05, "loss": 0.9036, "step": 6600, "task_loss": 1.1312909126281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1042873859405518, "epoch": 5.58, "learning_rate": 2.4556213017751482e-05, "loss": 0.9931, "step": 6601, "task_loss": 1.4244621992111206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6930620074272156, "epoch": 5.58, "learning_rate": 2.455151685920917e-05, "loss": 0.8476, "step": 6602, "task_loss": 0.7543149590492249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3054134845733643, "epoch": 5.58, "learning_rate": 2.4546820700666855e-05, "loss": 0.9814, "step": 6603, "task_loss": 1.3226145505905151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2858620882034302, "epoch": 5.58, "learning_rate": 2.454212454212454e-05, "loss": 1.0481, "step": 6604, "task_loss": 0.666312038898468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6937057971954346, "epoch": 5.58, "learning_rate": 2.453742838358223e-05, "loss": 0.7125, "step": 6605, "task_loss": 0.8389298915863037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41905003786087036, "epoch": 5.58, "learning_rate": 2.4532732225039917e-05, "loss": 0.5738, "step": 6606, "task_loss": 0.39687731862068176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6398493051528931, "epoch": 5.58, "learning_rate": 2.4528036066497607e-05, "loss": 1.0518, "step": 6607, "task_loss": 0.8782630562782288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0880212783813477, "epoch": 5.59, "learning_rate": 2.4523339907955293e-05, "loss": 0.9483, "step": 6608, "task_loss": 0.9946067333221436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5420563220977783, "epoch": 5.59, "learning_rate": 2.451864374941298e-05, "loss": 1.106, "step": 6609, "task_loss": 1.0222811698913574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8119205832481384, "epoch": 5.59, "learning_rate": 2.451394759087067e-05, "loss": 0.8444, "step": 6610, "task_loss": 1.0791847705841064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5991459488868713, "epoch": 5.59, "learning_rate": 2.4509251432328356e-05, "loss": 0.9086, "step": 6611, "task_loss": 0.8332309722900391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.817090630531311, "epoch": 5.59, "learning_rate": 2.4504555273786046e-05, "loss": 0.8717, "step": 6612, "task_loss": 1.6698169708251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9613997936248779, "epoch": 5.59, "learning_rate": 2.4499859115243732e-05, "loss": 1.0437, "step": 6613, "task_loss": 0.5155096650123596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8208536505699158, "epoch": 5.59, "learning_rate": 2.449516295670142e-05, "loss": 0.8153, "step": 6614, "task_loss": 0.4769150912761688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44721049070358276, "epoch": 5.59, "learning_rate": 2.4490466798159105e-05, "loss": 0.9333, "step": 6615, "task_loss": 0.7820694446563721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1996583938598633, "epoch": 5.59, "learning_rate": 2.4485770639616794e-05, "loss": 1.1367, "step": 6616, "task_loss": 2.040287494659424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9728130102157593, "epoch": 5.59, "learning_rate": 2.448107448107448e-05, "loss": 0.7319, "step": 6617, "task_loss": 1.1617364883422852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.855895459651947, "epoch": 5.59, "learning_rate": 2.447637832253217e-05, "loss": 1.0211, "step": 6618, "task_loss": 1.2099485397338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9172717332839966, "epoch": 5.59, "learning_rate": 2.4471682163989857e-05, "loss": 1.062, "step": 6619, "task_loss": 1.65121328830719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8879387378692627, "epoch": 5.6, "learning_rate": 2.4466986005447546e-05, "loss": 1.0185, "step": 6620, "task_loss": 1.0745211839675903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8266237378120422, "epoch": 5.6, "learning_rate": 2.4462289846905233e-05, "loss": 1.0461, "step": 6621, "task_loss": 1.063925862312317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.630460262298584, "epoch": 5.6, "learning_rate": 2.445759368836292e-05, "loss": 1.1333, "step": 6622, "task_loss": 1.3138689994812012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6859695315361023, "epoch": 5.6, "learning_rate": 2.445289752982061e-05, "loss": 0.8091, "step": 6623, "task_loss": 1.1032859086990356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.946871280670166, "epoch": 5.6, "learning_rate": 2.4448201371278295e-05, "loss": 0.8633, "step": 6624, "task_loss": 1.0336421728134155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6470715403556824, "epoch": 5.6, "learning_rate": 2.4443505212735985e-05, "loss": 0.8356, "step": 6625, "task_loss": 0.251172810792923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.907500147819519, "epoch": 5.6, "learning_rate": 2.443880905419367e-05, "loss": 1.2698, "step": 6626, "task_loss": 1.0850331783294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.136962652206421, "epoch": 5.6, "learning_rate": 2.443411289565136e-05, "loss": 0.766, "step": 6627, "task_loss": 1.5016676187515259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8989626169204712, "epoch": 5.6, "learning_rate": 2.4429416737109044e-05, "loss": 0.9549, "step": 6628, "task_loss": 0.7360947132110596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5806529521942139, "epoch": 5.6, "learning_rate": 2.4424720578566734e-05, "loss": 0.7508, "step": 6629, "task_loss": 0.7200555801391602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8136186599731445, "epoch": 5.6, "learning_rate": 2.442002442002442e-05, "loss": 0.8487, "step": 6630, "task_loss": 0.47695600986480713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.581135094165802, "epoch": 5.6, "learning_rate": 2.441532826148211e-05, "loss": 0.8187, "step": 6631, "task_loss": 1.2110118865966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.250044822692871, "epoch": 5.61, "learning_rate": 2.4410632102939796e-05, "loss": 0.9441, "step": 6632, "task_loss": 1.0270390510559082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7059073448181152, "epoch": 5.61, "learning_rate": 2.4405935944397486e-05, "loss": 1.2302, "step": 6633, "task_loss": 1.1635041236877441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.909400224685669, "epoch": 5.61, "learning_rate": 2.4401239785855172e-05, "loss": 1.2041, "step": 6634, "task_loss": 0.7382453680038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.537803590297699, "epoch": 5.61, "learning_rate": 2.439654362731286e-05, "loss": 0.7804, "step": 6635, "task_loss": 0.351135790348053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 2.058112621307373, "epoch": 5.61, "learning_rate": 2.4391847468770545e-05, "loss": 1.1022, "step": 6636, "task_loss": 1.5273407697677612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7188575863838196, "epoch": 5.61, "learning_rate": 2.4387151310228235e-05, "loss": 0.9305, "step": 6637, "task_loss": 0.5073179602622986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.355460286140442, "epoch": 5.61, "learning_rate": 2.4382455151685924e-05, "loss": 1.1358, "step": 6638, "task_loss": 1.515930414199829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3793998956680298, "epoch": 5.61, "learning_rate": 2.437775899314361e-05, "loss": 0.9288, "step": 6639, "task_loss": 0.08861976861953735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5557202696800232, "epoch": 5.61, "learning_rate": 2.4373062834601297e-05, "loss": 0.9369, "step": 6640, "task_loss": 0.7935031652450562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9381681084632874, "epoch": 5.61, "learning_rate": 2.4368366676058983e-05, "loss": 0.9423, "step": 6641, "task_loss": 0.820948600769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6585814952850342, "epoch": 5.61, "learning_rate": 2.4363670517516673e-05, "loss": 0.8706, "step": 6642, "task_loss": 0.47166207432746887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7500200271606445, "epoch": 5.61, "learning_rate": 2.435897435897436e-05, "loss": 1.0745, "step": 6643, "task_loss": 0.8711224794387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7233695387840271, "epoch": 5.62, "learning_rate": 2.435427820043205e-05, "loss": 0.9104, "step": 6644, "task_loss": 0.5659667253494263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8588014245033264, "epoch": 5.62, "learning_rate": 2.4349582041889735e-05, "loss": 1.1046, "step": 6645, "task_loss": 0.7058823108673096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6325877904891968, "epoch": 5.62, "learning_rate": 2.4344885883347425e-05, "loss": 0.8342, "step": 6646, "task_loss": 0.1982627958059311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5055789947509766, "epoch": 5.62, "learning_rate": 2.4340189724805108e-05, "loss": 0.8033, "step": 6647, "task_loss": 0.6870417594909668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.86025071144104, "epoch": 5.62, "learning_rate": 2.4335493566262798e-05, "loss": 0.935, "step": 6648, "task_loss": 0.7079517841339111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2837390899658203, "epoch": 5.62, "learning_rate": 2.4330797407720484e-05, "loss": 0.9875, "step": 6649, "task_loss": 0.7962954044342041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8910945653915405, "epoch": 5.62, "learning_rate": 2.4326101249178174e-05, "loss": 0.8498, "step": 6650, "task_loss": 1.0536521673202515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.671379566192627, "epoch": 5.62, "learning_rate": 2.432140509063586e-05, "loss": 0.7696, "step": 6651, "task_loss": 0.8075859546661377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7907781600952148, "epoch": 5.62, "learning_rate": 2.431670893209355e-05, "loss": 0.7466, "step": 6652, "task_loss": 1.156365156173706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0830881595611572, "epoch": 5.62, "learning_rate": 2.4312012773551236e-05, "loss": 1.0515, "step": 6653, "task_loss": 1.0267937183380127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.833695113658905, "epoch": 5.62, "learning_rate": 2.4307316615008923e-05, "loss": 0.6692, "step": 6654, "task_loss": 0.7679073214530945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1598137617111206, "epoch": 5.63, "learning_rate": 2.4302620456466612e-05, "loss": 0.9369, "step": 6655, "task_loss": 1.5609127283096313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8662961721420288, "epoch": 5.63, "learning_rate": 2.42979242979243e-05, "loss": 0.791, "step": 6656, "task_loss": 1.138088583946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0670026540756226, "epoch": 5.63, "learning_rate": 2.429322813938199e-05, "loss": 0.9298, "step": 6657, "task_loss": 1.2662304639816284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.373207449913025, "epoch": 5.63, "learning_rate": 2.4288531980839675e-05, "loss": 1.1095, "step": 6658, "task_loss": 0.8894612789154053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.786376953125, "epoch": 5.63, "learning_rate": 2.428383582229736e-05, "loss": 0.959, "step": 6659, "task_loss": 0.3115834593772888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1645961999893188, "epoch": 5.63, "learning_rate": 2.4279139663755047e-05, "loss": 0.9098, "step": 6660, "task_loss": 1.06203293800354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6678271293640137, "epoch": 5.63, "learning_rate": 2.4274443505212737e-05, "loss": 0.8207, "step": 6661, "task_loss": 1.1759073734283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0283564329147339, "epoch": 5.63, "learning_rate": 2.4269747346670424e-05, "loss": 1.0949, "step": 6662, "task_loss": 1.5555089712142944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7841814756393433, "epoch": 5.63, "learning_rate": 2.4265051188128113e-05, "loss": 0.7616, "step": 6663, "task_loss": 0.640498697757721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5832818746566772, "epoch": 5.63, "learning_rate": 2.42603550295858e-05, "loss": 0.7594, "step": 6664, "task_loss": 0.8962552547454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1064479351043701, "epoch": 5.63, "learning_rate": 2.425565887104349e-05, "loss": 1.021, "step": 6665, "task_loss": 0.8834772109985352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6273853778839111, "epoch": 5.63, "learning_rate": 2.4250962712501176e-05, "loss": 0.726, "step": 6666, "task_loss": 0.6214941740036011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9151614904403687, "epoch": 5.64, "learning_rate": 2.4246266553958862e-05, "loss": 0.9807, "step": 6667, "task_loss": 1.1253281831741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9365876317024231, "epoch": 5.64, "learning_rate": 2.424157039541655e-05, "loss": 0.9357, "step": 6668, "task_loss": 0.24669693410396576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0428639650344849, "epoch": 5.64, "learning_rate": 2.4236874236874238e-05, "loss": 0.8066, "step": 6669, "task_loss": 0.5238329172134399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1816184520721436, "epoch": 5.64, "learning_rate": 2.4232178078331928e-05, "loss": 1.3074, "step": 6670, "task_loss": 0.8739794492721558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4579183757305145, "epoch": 5.64, "learning_rate": 2.4227481919789614e-05, "loss": 0.6383, "step": 6671, "task_loss": 0.6071368455886841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.847671627998352, "epoch": 5.64, "learning_rate": 2.42227857612473e-05, "loss": 0.7871, "step": 6672, "task_loss": 0.7136431932449341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.100651502609253, "epoch": 5.64, "learning_rate": 2.4218089602704987e-05, "loss": 1.2206, "step": 6673, "task_loss": 1.5671916007995605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0612430572509766, "epoch": 5.64, "learning_rate": 2.4213393444162677e-05, "loss": 0.9326, "step": 6674, "task_loss": 0.807921290397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5986188650131226, "epoch": 5.64, "learning_rate": 2.4208697285620363e-05, "loss": 0.7661, "step": 6675, "task_loss": 0.17334291338920593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6017254590988159, "epoch": 5.64, "learning_rate": 2.4204001127078053e-05, "loss": 0.7954, "step": 6676, "task_loss": 0.4023938775062561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9478189945220947, "epoch": 5.64, "learning_rate": 2.419930496853574e-05, "loss": 0.7301, "step": 6677, "task_loss": 0.9096418619155884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42318636178970337, "epoch": 5.64, "learning_rate": 2.419460880999343e-05, "loss": 0.76, "step": 6678, "task_loss": 0.27015188336372375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7596333622932434, "epoch": 5.65, "learning_rate": 2.418991265145111e-05, "loss": 0.875, "step": 6679, "task_loss": 0.662571907043457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2702337503433228, "epoch": 5.65, "learning_rate": 2.41852164929088e-05, "loss": 1.1335, "step": 6680, "task_loss": 1.0464636087417603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6935229301452637, "epoch": 5.65, "learning_rate": 2.4180520334366488e-05, "loss": 0.7341, "step": 6681, "task_loss": 0.10915953665971756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4639397859573364, "epoch": 5.65, "learning_rate": 2.4175824175824177e-05, "loss": 0.5897, "step": 6682, "task_loss": 0.2648518681526184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5799527168273926, "epoch": 5.65, "learning_rate": 2.4171128017281864e-05, "loss": 0.8275, "step": 6683, "task_loss": 0.5818865895271301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3729114532470703, "epoch": 5.65, "learning_rate": 2.4166431858739553e-05, "loss": 0.9814, "step": 6684, "task_loss": 0.7873285412788391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1515474319458008, "epoch": 5.65, "learning_rate": 2.416173570019724e-05, "loss": 0.9354, "step": 6685, "task_loss": 0.9992050528526306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9275329113006592, "epoch": 5.65, "learning_rate": 2.4157039541654926e-05, "loss": 0.9854, "step": 6686, "task_loss": 1.4290953874588013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4899478852748871, "epoch": 5.65, "learning_rate": 2.4152343383112616e-05, "loss": 0.6192, "step": 6687, "task_loss": 0.3014602065086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1952579021453857, "epoch": 5.65, "learning_rate": 2.4147647224570302e-05, "loss": 1.0752, "step": 6688, "task_loss": 1.1045126914978027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.698963463306427, "epoch": 5.65, "learning_rate": 2.4142951066027992e-05, "loss": 0.666, "step": 6689, "task_loss": 1.2605646848678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7402207851409912, "epoch": 5.65, "learning_rate": 2.413825490748568e-05, "loss": 0.6763, "step": 6690, "task_loss": 1.1705387830734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9535470008850098, "epoch": 5.66, "learning_rate": 2.4133558748943365e-05, "loss": 1.3241, "step": 6691, "task_loss": 1.0142521858215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1447124481201172, "epoch": 5.66, "learning_rate": 2.412886259040105e-05, "loss": 0.9807, "step": 6692, "task_loss": 1.9657164812088013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0589673519134521, "epoch": 5.66, "learning_rate": 2.412416643185874e-05, "loss": 1.0214, "step": 6693, "task_loss": 1.2299724817276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9088494777679443, "epoch": 5.66, "learning_rate": 2.4119470273316427e-05, "loss": 0.8565, "step": 6694, "task_loss": 0.5997495055198669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.142212152481079, "epoch": 5.66, "learning_rate": 2.4114774114774117e-05, "loss": 1.0995, "step": 6695, "task_loss": 1.0152781009674072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1859357357025146, "epoch": 5.66, "learning_rate": 2.4110077956231803e-05, "loss": 1.0325, "step": 6696, "task_loss": 0.87417072057724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9592379331588745, "epoch": 5.66, "learning_rate": 2.4105381797689493e-05, "loss": 0.8754, "step": 6697, "task_loss": 0.7784700989723206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9023374319076538, "epoch": 5.66, "learning_rate": 2.4100685639147176e-05, "loss": 0.9607, "step": 6698, "task_loss": 0.25220412015914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7597769498825073, "epoch": 5.66, "learning_rate": 2.4095989480604866e-05, "loss": 0.7456, "step": 6699, "task_loss": 1.1861598491668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2931573390960693, "epoch": 5.66, "learning_rate": 2.4091293322062555e-05, "loss": 0.9023, "step": 6700, "task_loss": 1.8444111347198486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6457901000976562, "epoch": 5.66, "learning_rate": 2.408659716352024e-05, "loss": 0.7163, "step": 6701, "task_loss": 0.7411329746246338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.874161958694458, "epoch": 5.66, "learning_rate": 2.408190100497793e-05, "loss": 0.7032, "step": 6702, "task_loss": 0.5570341348648071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0381759405136108, "epoch": 5.67, "learning_rate": 2.4077204846435618e-05, "loss": 0.8266, "step": 6703, "task_loss": 1.0522780418395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0730299949645996, "epoch": 5.67, "learning_rate": 2.4072508687893304e-05, "loss": 0.9997, "step": 6704, "task_loss": 1.4148147106170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.285895824432373, "epoch": 5.67, "learning_rate": 2.406781252935099e-05, "loss": 1.201, "step": 6705, "task_loss": 2.1042075157165527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7462339401245117, "epoch": 5.67, "learning_rate": 2.406311637080868e-05, "loss": 0.67, "step": 6706, "task_loss": 0.7871127128601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8379177451133728, "epoch": 5.67, "learning_rate": 2.4058420212266366e-05, "loss": 0.7469, "step": 6707, "task_loss": 1.5637456178665161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6633034944534302, "epoch": 5.67, "learning_rate": 2.4053724053724056e-05, "loss": 0.8865, "step": 6708, "task_loss": 0.5655769109725952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9467108249664307, "epoch": 5.67, "learning_rate": 2.4049027895181742e-05, "loss": 0.9702, "step": 6709, "task_loss": 0.8540567755699158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8440371751785278, "epoch": 5.67, "learning_rate": 2.404433173663943e-05, "loss": 0.7798, "step": 6710, "task_loss": 0.8622531294822693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9724523425102234, "epoch": 5.67, "learning_rate": 2.4039635578097115e-05, "loss": 1.1839, "step": 6711, "task_loss": 0.9750789403915405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7582234144210815, "epoch": 5.67, "learning_rate": 2.4034939419554805e-05, "loss": 0.757, "step": 6712, "task_loss": 0.29026368260383606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48764902353286743, "epoch": 5.67, "learning_rate": 2.403024326101249e-05, "loss": 0.8076, "step": 6713, "task_loss": 0.5419002175331116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2826939821243286, "epoch": 5.67, "learning_rate": 2.402554710247018e-05, "loss": 0.9619, "step": 6714, "task_loss": 1.2972105741500854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6389666795730591, "epoch": 5.68, "learning_rate": 2.402085094392787e-05, "loss": 0.8879, "step": 6715, "task_loss": 0.6819944381713867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7594606280326843, "epoch": 5.68, "learning_rate": 2.4016154785385557e-05, "loss": 0.8273, "step": 6716, "task_loss": 0.23025716841220856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.803584098815918, "epoch": 5.68, "learning_rate": 2.4011458626843243e-05, "loss": 0.8802, "step": 6717, "task_loss": 0.6110880970954895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8747059106826782, "epoch": 5.68, "learning_rate": 2.400676246830093e-05, "loss": 0.7512, "step": 6718, "task_loss": 2.2715461254119873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.910496711730957, "epoch": 5.68, "learning_rate": 2.400206630975862e-05, "loss": 1.2916, "step": 6719, "task_loss": 0.9776650667190552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2206528186798096, "epoch": 5.68, "learning_rate": 2.3997370151216306e-05, "loss": 0.8925, "step": 6720, "task_loss": 1.4328557252883911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4446865320205688, "epoch": 5.68, "learning_rate": 2.3992673992673995e-05, "loss": 0.964, "step": 6721, "task_loss": 0.685194194316864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8352596759796143, "epoch": 5.68, "learning_rate": 2.3987977834131682e-05, "loss": 0.8212, "step": 6722, "task_loss": 0.4034329056739807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5897613763809204, "epoch": 5.68, "learning_rate": 2.3983281675589368e-05, "loss": 1.0465, "step": 6723, "task_loss": 2.1436948776245117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0675944089889526, "epoch": 5.68, "learning_rate": 2.3978585517047055e-05, "loss": 1.0278, "step": 6724, "task_loss": 0.9114589691162109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7282688617706299, "epoch": 5.68, "learning_rate": 2.3973889358504744e-05, "loss": 0.9645, "step": 6725, "task_loss": 0.628159761428833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9330127835273743, "epoch": 5.69, "learning_rate": 2.396919319996243e-05, "loss": 0.8942, "step": 6726, "task_loss": 0.9269289970397949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4653854370117188, "epoch": 5.69, "learning_rate": 2.396449704142012e-05, "loss": 1.269, "step": 6727, "task_loss": 1.2264471054077148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2153143882751465, "epoch": 5.69, "learning_rate": 2.3959800882877807e-05, "loss": 0.9267, "step": 6728, "task_loss": 1.8929495811462402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5065398216247559, "epoch": 5.69, "learning_rate": 2.3955104724335496e-05, "loss": 0.8302, "step": 6729, "task_loss": 0.41992226243019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6887391209602356, "epoch": 5.69, "learning_rate": 2.3950408565793183e-05, "loss": 0.6423, "step": 6730, "task_loss": 1.3406347036361694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6049679517745972, "epoch": 5.69, "learning_rate": 2.394571240725087e-05, "loss": 0.7939, "step": 6731, "task_loss": 1.6558846235275269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8503549695014954, "epoch": 5.69, "learning_rate": 2.394101624870856e-05, "loss": 1.1088, "step": 6732, "task_loss": 1.0449090003967285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.720858097076416, "epoch": 5.69, "learning_rate": 2.3936320090166245e-05, "loss": 0.7858, "step": 6733, "task_loss": 0.2571253478527069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8401659727096558, "epoch": 5.69, "learning_rate": 2.3931623931623935e-05, "loss": 0.8951, "step": 6734, "task_loss": 1.1766583919525146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.918848991394043, "epoch": 5.69, "learning_rate": 2.392692777308162e-05, "loss": 0.8965, "step": 6735, "task_loss": 0.5514716506004333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.053659200668335, "epoch": 5.69, "learning_rate": 2.3922231614539308e-05, "loss": 0.9142, "step": 6736, "task_loss": 1.1920145750045776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0580832958221436, "epoch": 5.69, "learning_rate": 2.3917535455996994e-05, "loss": 0.7459, "step": 6737, "task_loss": 0.7092434167861938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0503007173538208, "epoch": 5.7, "learning_rate": 2.3912839297454684e-05, "loss": 0.8874, "step": 6738, "task_loss": 1.0656776428222656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0154904127120972, "epoch": 5.7, "learning_rate": 2.390814313891237e-05, "loss": 0.9125, "step": 6739, "task_loss": 0.33276402950286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.805359959602356, "epoch": 5.7, "learning_rate": 2.390344698037006e-05, "loss": 0.8591, "step": 6740, "task_loss": 0.34107446670532227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7013375163078308, "epoch": 5.7, "learning_rate": 2.3898750821827746e-05, "loss": 0.6753, "step": 6741, "task_loss": 1.3797718286514282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9709469079971313, "epoch": 5.7, "learning_rate": 2.3894054663285432e-05, "loss": 1.2308, "step": 6742, "task_loss": 1.1232008934020996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8366460204124451, "epoch": 5.7, "learning_rate": 2.388935850474312e-05, "loss": 0.9826, "step": 6743, "task_loss": 1.3507962226867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6966295838356018, "epoch": 5.7, "learning_rate": 2.388466234620081e-05, "loss": 0.8546, "step": 6744, "task_loss": 1.15293550491333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9821621179580688, "epoch": 5.7, "learning_rate": 2.3879966187658495e-05, "loss": 0.9458, "step": 6745, "task_loss": 1.179903268814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.782833456993103, "epoch": 5.7, "learning_rate": 2.3875270029116184e-05, "loss": 0.9911, "step": 6746, "task_loss": 1.1119225025177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1905227899551392, "epoch": 5.7, "learning_rate": 2.3870573870573874e-05, "loss": 1.0834, "step": 6747, "task_loss": 0.3836309611797333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5821097493171692, "epoch": 5.7, "learning_rate": 2.386587771203156e-05, "loss": 0.9759, "step": 6748, "task_loss": 0.610830545425415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.991465151309967, "epoch": 5.7, "learning_rate": 2.3861181553489247e-05, "loss": 0.7552, "step": 6749, "task_loss": 1.2862426042556763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.753917932510376, "epoch": 5.71, "learning_rate": 2.3856485394946933e-05, "loss": 0.6696, "step": 6750, "task_loss": 0.2988802194595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49491411447525024, "epoch": 5.71, "learning_rate": 2.3851789236404623e-05, "loss": 0.6805, "step": 6751, "task_loss": 0.892216682434082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0529417991638184, "epoch": 5.71, "learning_rate": 2.384709307786231e-05, "loss": 0.7675, "step": 6752, "task_loss": 0.973456621170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5247900485992432, "epoch": 5.71, "learning_rate": 2.384239691932e-05, "loss": 0.8147, "step": 6753, "task_loss": 0.55158531665802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9365881085395813, "epoch": 5.71, "learning_rate": 2.3837700760777685e-05, "loss": 0.8407, "step": 6754, "task_loss": 2.148561477661133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6395298838615417, "epoch": 5.71, "learning_rate": 2.3833004602235372e-05, "loss": 0.9224, "step": 6755, "task_loss": 1.1203320026397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5307620763778687, "epoch": 5.71, "learning_rate": 2.3828308443693058e-05, "loss": 0.829, "step": 6756, "task_loss": 0.4411768913269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2598950862884521, "epoch": 5.71, "learning_rate": 2.3823612285150748e-05, "loss": 0.9772, "step": 6757, "task_loss": 1.4861348867416382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.612754225730896, "epoch": 5.71, "learning_rate": 2.3818916126608434e-05, "loss": 0.767, "step": 6758, "task_loss": 0.7962609529495239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.657630205154419, "epoch": 5.71, "learning_rate": 2.3814219968066124e-05, "loss": 0.7489, "step": 6759, "task_loss": 0.8585364818572998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9311295747756958, "epoch": 5.71, "learning_rate": 2.380952380952381e-05, "loss": 0.8249, "step": 6760, "task_loss": 1.1434255838394165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8541572690010071, "epoch": 5.71, "learning_rate": 2.38048276509815e-05, "loss": 1.03, "step": 6761, "task_loss": 0.6732721328735352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6410472393035889, "epoch": 5.72, "learning_rate": 2.3800131492439186e-05, "loss": 0.7643, "step": 6762, "task_loss": 0.4496767520904541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8636772632598877, "epoch": 5.72, "learning_rate": 2.3795435333896873e-05, "loss": 0.7582, "step": 6763, "task_loss": 0.48120084404945374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5913612246513367, "epoch": 5.72, "learning_rate": 2.3790739175354562e-05, "loss": 0.9875, "step": 6764, "task_loss": 0.9333608150482178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0523264408111572, "epoch": 5.72, "learning_rate": 2.378604301681225e-05, "loss": 1.1798, "step": 6765, "task_loss": 1.005062460899353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.055492639541626, "epoch": 5.72, "learning_rate": 2.378134685826994e-05, "loss": 1.1007, "step": 6766, "task_loss": 1.5288063287734985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0697102546691895, "epoch": 5.72, "learning_rate": 2.3776650699727625e-05, "loss": 1.1148, "step": 6767, "task_loss": 0.9422531723976135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9807431697845459, "epoch": 5.72, "learning_rate": 2.377195454118531e-05, "loss": 0.9831, "step": 6768, "task_loss": 1.160199761390686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8231852054595947, "epoch": 5.72, "learning_rate": 2.3767258382642997e-05, "loss": 1.1457, "step": 6769, "task_loss": 1.8833708763122559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7812897562980652, "epoch": 5.72, "learning_rate": 2.3762562224100687e-05, "loss": 0.8156, "step": 6770, "task_loss": 0.5552233457565308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6236609816551208, "epoch": 5.72, "learning_rate": 2.3757866065558373e-05, "loss": 0.9773, "step": 6771, "task_loss": 0.7118287682533264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6649678945541382, "epoch": 5.72, "learning_rate": 2.3753169907016063e-05, "loss": 0.8662, "step": 6772, "task_loss": 0.9807431697845459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4607981741428375, "epoch": 5.72, "learning_rate": 2.374847374847375e-05, "loss": 0.8819, "step": 6773, "task_loss": 1.773616075515747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6728079319000244, "epoch": 5.73, "learning_rate": 2.3743777589931436e-05, "loss": 0.7518, "step": 6774, "task_loss": 0.49786239862442017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.098536491394043, "epoch": 5.73, "learning_rate": 2.3739081431389122e-05, "loss": 0.9188, "step": 6775, "task_loss": 1.0983518362045288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7141021490097046, "epoch": 5.73, "learning_rate": 2.3734385272846812e-05, "loss": 0.7367, "step": 6776, "task_loss": 0.7454456090927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47558853030204773, "epoch": 5.73, "learning_rate": 2.37296891143045e-05, "loss": 0.4233, "step": 6777, "task_loss": 0.14606861770153046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4589689075946808, "epoch": 5.73, "learning_rate": 2.3724992955762188e-05, "loss": 0.637, "step": 6778, "task_loss": 0.22938290238380432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5932426452636719, "epoch": 5.73, "learning_rate": 2.3720296797219878e-05, "loss": 0.7512, "step": 6779, "task_loss": 1.210940957069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5168956518173218, "epoch": 5.73, "learning_rate": 2.3715600638677564e-05, "loss": 0.7626, "step": 6780, "task_loss": 0.5314469933509827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8436493873596191, "epoch": 5.73, "learning_rate": 2.371090448013525e-05, "loss": 0.9492, "step": 6781, "task_loss": 0.9849116206169128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7337509989738464, "epoch": 5.73, "learning_rate": 2.3706208321592937e-05, "loss": 0.935, "step": 6782, "task_loss": 0.5635011196136475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8422363996505737, "epoch": 5.73, "learning_rate": 2.3701512163050626e-05, "loss": 0.8883, "step": 6783, "task_loss": 0.5890029072761536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8110880255699158, "epoch": 5.73, "learning_rate": 2.3696816004508313e-05, "loss": 0.8629, "step": 6784, "task_loss": 0.8420389890670776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9593644142150879, "epoch": 5.73, "learning_rate": 2.3692119845966003e-05, "loss": 0.8809, "step": 6785, "task_loss": 0.5771307945251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7364098429679871, "epoch": 5.74, "learning_rate": 2.368742368742369e-05, "loss": 0.7436, "step": 6786, "task_loss": 1.1267242431640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5847415924072266, "epoch": 5.74, "learning_rate": 2.3682727528881375e-05, "loss": 0.9291, "step": 6787, "task_loss": 0.6240774989128113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9620779752731323, "epoch": 5.74, "learning_rate": 2.367803137033906e-05, "loss": 0.885, "step": 6788, "task_loss": 1.0169920921325684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5182778835296631, "epoch": 5.74, "learning_rate": 2.367333521179675e-05, "loss": 0.7864, "step": 6789, "task_loss": 0.4089384973049164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.773504912853241, "epoch": 5.74, "learning_rate": 2.3668639053254438e-05, "loss": 0.7423, "step": 6790, "task_loss": 0.7762398719787598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.699677050113678, "epoch": 5.74, "learning_rate": 2.3663942894712127e-05, "loss": 0.9109, "step": 6791, "task_loss": 1.1919825077056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0993223190307617, "epoch": 5.74, "learning_rate": 2.3659246736169817e-05, "loss": 1.2199, "step": 6792, "task_loss": 1.123986005783081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4528881311416626, "epoch": 5.74, "learning_rate": 2.36545505776275e-05, "loss": 0.897, "step": 6793, "task_loss": 2.1795127391815186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1702656745910645, "epoch": 5.74, "learning_rate": 2.364985441908519e-05, "loss": 0.9933, "step": 6794, "task_loss": 0.4359816908836365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.756558895111084, "epoch": 5.74, "learning_rate": 2.3645158260542876e-05, "loss": 0.8728, "step": 6795, "task_loss": 1.057492971420288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0622364282608032, "epoch": 5.74, "learning_rate": 2.3640462102000566e-05, "loss": 0.9923, "step": 6796, "task_loss": 0.888116180896759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0495953559875488, "epoch": 5.75, "learning_rate": 2.3635765943458252e-05, "loss": 0.9764, "step": 6797, "task_loss": 0.8294357061386108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9675319790840149, "epoch": 5.75, "learning_rate": 2.3631069784915942e-05, "loss": 0.8055, "step": 6798, "task_loss": 0.4885249435901642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3587136268615723, "epoch": 5.75, "learning_rate": 2.3626373626373628e-05, "loss": 1.1005, "step": 6799, "task_loss": 0.7753101587295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37446045875549316, "epoch": 5.75, "learning_rate": 2.3621677467831315e-05, "loss": 0.6066, "step": 6800, "task_loss": 0.49950113892555237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6154265403747559, "epoch": 5.75, "learning_rate": 2.3616981309289e-05, "loss": 1.1312, "step": 6801, "task_loss": 1.5039540529251099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6585464477539062, "epoch": 5.75, "learning_rate": 2.361228515074669e-05, "loss": 0.9463, "step": 6802, "task_loss": 1.507667899131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2881836891174316, "epoch": 5.75, "learning_rate": 2.3607588992204377e-05, "loss": 0.9524, "step": 6803, "task_loss": 1.1745299100875854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.6048994064331055, "epoch": 5.75, "learning_rate": 2.3602892833662067e-05, "loss": 0.957, "step": 6804, "task_loss": 0.684712827205658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7645369172096252, "epoch": 5.75, "learning_rate": 2.3598196675119753e-05, "loss": 1.0451, "step": 6805, "task_loss": 0.9328283071517944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8263797163963318, "epoch": 5.75, "learning_rate": 2.359350051657744e-05, "loss": 0.9066, "step": 6806, "task_loss": 1.1664538383483887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5030562877655029, "epoch": 5.75, "learning_rate": 2.358880435803513e-05, "loss": 0.7478, "step": 6807, "task_loss": 0.46425676345825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8725855350494385, "epoch": 5.75, "learning_rate": 2.3584108199492815e-05, "loss": 1.2244, "step": 6808, "task_loss": 0.9999521374702454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6280463933944702, "epoch": 5.76, "learning_rate": 2.3579412040950505e-05, "loss": 0.7202, "step": 6809, "task_loss": 1.6380839347839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1800111532211304, "epoch": 5.76, "learning_rate": 2.357471588240819e-05, "loss": 0.829, "step": 6810, "task_loss": 0.26093339920043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6319969892501831, "epoch": 5.76, "learning_rate": 2.357001972386588e-05, "loss": 0.781, "step": 6811, "task_loss": 1.0988578796386719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6074014902114868, "epoch": 5.76, "learning_rate": 2.3565323565323568e-05, "loss": 0.7322, "step": 6812, "task_loss": 1.25213623046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5137218832969666, "epoch": 5.76, "learning_rate": 2.3560627406781254e-05, "loss": 0.8427, "step": 6813, "task_loss": 0.7499380707740784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5483342409133911, "epoch": 5.76, "learning_rate": 2.355593124823894e-05, "loss": 0.7722, "step": 6814, "task_loss": 0.4077723026275635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9410679340362549, "epoch": 5.76, "learning_rate": 2.355123508969663e-05, "loss": 0.7725, "step": 6815, "task_loss": 0.9299442172050476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6100887060165405, "epoch": 5.76, "learning_rate": 2.3546538931154316e-05, "loss": 0.7016, "step": 6816, "task_loss": 0.9085521101951599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.216246485710144, "epoch": 5.76, "learning_rate": 2.3541842772612006e-05, "loss": 0.9952, "step": 6817, "task_loss": 1.888864278793335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.212327480316162, "epoch": 5.76, "learning_rate": 2.3537146614069692e-05, "loss": 0.9393, "step": 6818, "task_loss": 1.422967791557312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9767873883247375, "epoch": 5.76, "learning_rate": 2.353245045552738e-05, "loss": 0.9512, "step": 6819, "task_loss": 1.1837917566299438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3333511352539062, "epoch": 5.76, "learning_rate": 2.3527754296985065e-05, "loss": 1.0178, "step": 6820, "task_loss": 1.4801969528198242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3590152859687805, "epoch": 5.77, "learning_rate": 2.3523058138442755e-05, "loss": 0.7483, "step": 6821, "task_loss": 0.2580345571041107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4334285855293274, "epoch": 5.77, "learning_rate": 2.351836197990044e-05, "loss": 0.9249, "step": 6822, "task_loss": 0.2827914357185364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8378063440322876, "epoch": 5.77, "learning_rate": 2.351366582135813e-05, "loss": 1.2055, "step": 6823, "task_loss": 0.8619141578674316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9888297319412231, "epoch": 5.77, "learning_rate": 2.350896966281582e-05, "loss": 0.8761, "step": 6824, "task_loss": 0.5391133427619934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0114572048187256, "epoch": 5.77, "learning_rate": 2.3504273504273504e-05, "loss": 0.8212, "step": 6825, "task_loss": 1.0523393154144287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9178014397621155, "epoch": 5.77, "learning_rate": 2.3499577345731193e-05, "loss": 0.9809, "step": 6826, "task_loss": 1.1906602382659912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1491262912750244, "epoch": 5.77, "learning_rate": 2.349488118718888e-05, "loss": 0.9491, "step": 6827, "task_loss": 1.092125654220581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0226373672485352, "epoch": 5.77, "learning_rate": 2.349018502864657e-05, "loss": 0.9059, "step": 6828, "task_loss": 0.7144634127616882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7359093427658081, "epoch": 5.77, "learning_rate": 2.3485488870104256e-05, "loss": 0.9656, "step": 6829, "task_loss": 0.8184313774108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3813645839691162, "epoch": 5.77, "learning_rate": 2.3480792711561945e-05, "loss": 1.061, "step": 6830, "task_loss": 0.4989403486251831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7664267420768738, "epoch": 5.77, "learning_rate": 2.3476096553019632e-05, "loss": 0.7546, "step": 6831, "task_loss": 0.4185173213481903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3168002367019653, "epoch": 5.77, "learning_rate": 2.3471400394477318e-05, "loss": 0.8221, "step": 6832, "task_loss": 1.364028811454773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6354924440383911, "epoch": 5.78, "learning_rate": 2.3466704235935004e-05, "loss": 1.1353, "step": 6833, "task_loss": 1.573555827140808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4167433977127075, "epoch": 5.78, "learning_rate": 2.3462008077392694e-05, "loss": 1.0049, "step": 6834, "task_loss": 1.2953133583068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8627262115478516, "epoch": 5.78, "learning_rate": 2.345731191885038e-05, "loss": 0.9682, "step": 6835, "task_loss": 1.256417989730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6282433271408081, "epoch": 5.78, "learning_rate": 2.345261576030807e-05, "loss": 0.9248, "step": 6836, "task_loss": 0.7942405939102173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9341217279434204, "epoch": 5.78, "learning_rate": 2.3447919601765757e-05, "loss": 0.8284, "step": 6837, "task_loss": 1.022289514541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6974766254425049, "epoch": 5.78, "learning_rate": 2.3443223443223443e-05, "loss": 0.9916, "step": 6838, "task_loss": 0.4251866638660431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.794503927230835, "epoch": 5.78, "learning_rate": 2.3438527284681133e-05, "loss": 0.6981, "step": 6839, "task_loss": 1.6146925687789917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5364608764648438, "epoch": 5.78, "learning_rate": 2.343383112613882e-05, "loss": 1.3108, "step": 6840, "task_loss": 1.0463383197784424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.790054202079773, "epoch": 5.78, "learning_rate": 2.342913496759651e-05, "loss": 0.7953, "step": 6841, "task_loss": 0.5601029396057129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45132768154144287, "epoch": 5.78, "learning_rate": 2.3424438809054195e-05, "loss": 0.6887, "step": 6842, "task_loss": 1.015944242477417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4502911567687988, "epoch": 5.78, "learning_rate": 2.3419742650511885e-05, "loss": 1.1734, "step": 6843, "task_loss": 0.7939892411231995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.400557518005371, "epoch": 5.78, "learning_rate": 2.341504649196957e-05, "loss": 0.9336, "step": 6844, "task_loss": 1.5174509286880493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7225519418716431, "epoch": 5.79, "learning_rate": 2.3410350333427257e-05, "loss": 0.9157, "step": 6845, "task_loss": 0.35504353046417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9297246336936951, "epoch": 5.79, "learning_rate": 2.3405654174884944e-05, "loss": 0.8967, "step": 6846, "task_loss": 0.3101593255996704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46276307106018066, "epoch": 5.79, "learning_rate": 2.3400958016342634e-05, "loss": 0.8509, "step": 6847, "task_loss": 0.7028865218162537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5728351473808289, "epoch": 5.79, "learning_rate": 2.339626185780032e-05, "loss": 0.7822, "step": 6848, "task_loss": 0.909103274345398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7553730010986328, "epoch": 5.79, "learning_rate": 2.339156569925801e-05, "loss": 0.6899, "step": 6849, "task_loss": 1.172672152519226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.802759051322937, "epoch": 5.79, "learning_rate": 2.3386869540715696e-05, "loss": 0.793, "step": 6850, "task_loss": 0.621513843536377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7995235323905945, "epoch": 5.79, "learning_rate": 2.3382173382173382e-05, "loss": 0.8495, "step": 6851, "task_loss": 1.2424572706222534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8351410627365112, "epoch": 5.79, "learning_rate": 2.337747722363107e-05, "loss": 1.0225, "step": 6852, "task_loss": 0.7646808624267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8923661708831787, "epoch": 5.79, "learning_rate": 2.337278106508876e-05, "loss": 1.0041, "step": 6853, "task_loss": 1.52045476436615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8104562759399414, "epoch": 5.79, "learning_rate": 2.3368084906546448e-05, "loss": 0.9341, "step": 6854, "task_loss": 0.9462757706642151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7566956281661987, "epoch": 5.79, "learning_rate": 2.3363388748004134e-05, "loss": 0.6267, "step": 6855, "task_loss": 0.8107412457466125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8587989807128906, "epoch": 5.79, "learning_rate": 2.3358692589461824e-05, "loss": 0.8467, "step": 6856, "task_loss": 0.7658548951148987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6538493037223816, "epoch": 5.8, "learning_rate": 2.3353996430919507e-05, "loss": 0.758, "step": 6857, "task_loss": 1.0989794731140137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7427317500114441, "epoch": 5.8, "learning_rate": 2.3349300272377197e-05, "loss": 0.8188, "step": 6858, "task_loss": 0.7333974242210388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.442085325717926, "epoch": 5.8, "learning_rate": 2.3344604113834883e-05, "loss": 0.6523, "step": 6859, "task_loss": 0.8253942131996155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8534146547317505, "epoch": 5.8, "learning_rate": 2.3339907955292573e-05, "loss": 0.7179, "step": 6860, "task_loss": 1.7735497951507568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.843344509601593, "epoch": 5.8, "learning_rate": 2.333521179675026e-05, "loss": 0.7314, "step": 6861, "task_loss": 0.6097232699394226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7665247321128845, "epoch": 5.8, "learning_rate": 2.333051563820795e-05, "loss": 0.8157, "step": 6862, "task_loss": 1.3952053785324097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6893059611320496, "epoch": 5.8, "learning_rate": 2.3325819479665635e-05, "loss": 0.7709, "step": 6863, "task_loss": 0.6930266618728638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8679988384246826, "epoch": 5.8, "learning_rate": 2.332112332112332e-05, "loss": 0.7166, "step": 6864, "task_loss": 1.130811095237732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8586546182632446, "epoch": 5.8, "learning_rate": 2.3316427162581008e-05, "loss": 0.927, "step": 6865, "task_loss": 1.284659504890442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6050385236740112, "epoch": 5.8, "learning_rate": 2.3311731004038698e-05, "loss": 0.8173, "step": 6866, "task_loss": 0.7318169474601746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5167334675788879, "epoch": 5.8, "learning_rate": 2.3307034845496384e-05, "loss": 0.8118, "step": 6867, "task_loss": 0.256539523601532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5412167310714722, "epoch": 5.81, "learning_rate": 2.3302338686954074e-05, "loss": 0.7207, "step": 6868, "task_loss": 0.5017980933189392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7587090730667114, "epoch": 5.81, "learning_rate": 2.329764252841176e-05, "loss": 0.8699, "step": 6869, "task_loss": 1.1228703260421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4404726028442383, "epoch": 5.81, "learning_rate": 2.3292946369869446e-05, "loss": 0.8959, "step": 6870, "task_loss": 1.045318365097046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.686195969581604, "epoch": 5.81, "learning_rate": 2.3288250211327136e-05, "loss": 0.8004, "step": 6871, "task_loss": 1.61308753490448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8011695146560669, "epoch": 5.81, "learning_rate": 2.3283554052784823e-05, "loss": 0.7128, "step": 6872, "task_loss": 1.1039596796035767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2391188144683838, "epoch": 5.81, "learning_rate": 2.3278857894242512e-05, "loss": 0.9571, "step": 6873, "task_loss": 0.7356581687927246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6594183444976807, "epoch": 5.81, "learning_rate": 2.32741617357002e-05, "loss": 0.9206, "step": 6874, "task_loss": 0.14400354027748108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7619233727455139, "epoch": 5.81, "learning_rate": 2.326946557715789e-05, "loss": 0.9263, "step": 6875, "task_loss": 0.6382659077644348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1585818529129028, "epoch": 5.81, "learning_rate": 2.326476941861557e-05, "loss": 0.8938, "step": 6876, "task_loss": 0.783816933631897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6864654421806335, "epoch": 5.81, "learning_rate": 2.326007326007326e-05, "loss": 0.9365, "step": 6877, "task_loss": 0.9610595107078552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6200267672538757, "epoch": 5.81, "learning_rate": 2.3255377101530947e-05, "loss": 0.8179, "step": 6878, "task_loss": 0.4812156558036804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1050633192062378, "epoch": 5.81, "learning_rate": 2.3250680942988637e-05, "loss": 1.4977, "step": 6879, "task_loss": 1.0830085277557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8092394471168518, "epoch": 5.82, "learning_rate": 2.3245984784446323e-05, "loss": 0.9317, "step": 6880, "task_loss": 0.405548095703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8841133713722229, "epoch": 5.82, "learning_rate": 2.3241288625904013e-05, "loss": 0.9087, "step": 6881, "task_loss": 1.1318182945251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.007925033569336, "epoch": 5.82, "learning_rate": 2.32365924673617e-05, "loss": 0.9969, "step": 6882, "task_loss": 1.354547381401062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7686929106712341, "epoch": 5.82, "learning_rate": 2.3231896308819386e-05, "loss": 0.7524, "step": 6883, "task_loss": 0.5223163366317749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9769868850708008, "epoch": 5.82, "learning_rate": 2.3227200150277076e-05, "loss": 0.6363, "step": 6884, "task_loss": 1.4073272943496704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8222090005874634, "epoch": 5.82, "learning_rate": 2.3222503991734762e-05, "loss": 0.7308, "step": 6885, "task_loss": 0.21077287197113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6233669519424438, "epoch": 5.82, "learning_rate": 2.321780783319245e-05, "loss": 0.7602, "step": 6886, "task_loss": 0.2762865722179413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3236417770385742, "epoch": 5.82, "learning_rate": 2.3213111674650138e-05, "loss": 0.9728, "step": 6887, "task_loss": 0.9814698696136475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5462210178375244, "epoch": 5.82, "learning_rate": 2.3208415516107824e-05, "loss": 0.5563, "step": 6888, "task_loss": 0.37993213534355164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0004702806472778, "epoch": 5.82, "learning_rate": 2.320371935756551e-05, "loss": 0.8634, "step": 6889, "task_loss": 0.5098750591278076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1138341426849365, "epoch": 5.82, "learning_rate": 2.31990231990232e-05, "loss": 0.8875, "step": 6890, "task_loss": 1.359745979309082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6399946212768555, "epoch": 5.82, "learning_rate": 2.3194327040480887e-05, "loss": 0.8642, "step": 6891, "task_loss": 0.4864305853843689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8141530156135559, "epoch": 5.83, "learning_rate": 2.3189630881938576e-05, "loss": 0.7493, "step": 6892, "task_loss": 0.2087172418832779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0358107089996338, "epoch": 5.83, "learning_rate": 2.3184934723396263e-05, "loss": 0.9869, "step": 6893, "task_loss": 0.7740412354469299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4288710951805115, "epoch": 5.83, "learning_rate": 2.3180238564853952e-05, "loss": 0.7663, "step": 6894, "task_loss": 1.085727334022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5555062294006348, "epoch": 5.83, "learning_rate": 2.317554240631164e-05, "loss": 0.6862, "step": 6895, "task_loss": 0.3498213291168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8729539513587952, "epoch": 5.83, "learning_rate": 2.3170846247769325e-05, "loss": 1.1404, "step": 6896, "task_loss": 1.4903252124786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.754063606262207, "epoch": 5.83, "learning_rate": 2.316615008922701e-05, "loss": 0.7722, "step": 6897, "task_loss": 1.448262095451355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3116471767425537, "epoch": 5.83, "learning_rate": 2.31614539306847e-05, "loss": 0.978, "step": 6898, "task_loss": 0.3612949550151825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36440032720565796, "epoch": 5.83, "learning_rate": 2.3156757772142388e-05, "loss": 0.6397, "step": 6899, "task_loss": 0.4426412880420685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3543827533721924, "epoch": 5.83, "learning_rate": 2.3152061613600077e-05, "loss": 0.846, "step": 6900, "task_loss": 1.5030003786087036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7986416220664978, "epoch": 5.83, "learning_rate": 2.3147365455057764e-05, "loss": 0.7467, "step": 6901, "task_loss": 1.2942898273468018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0500165224075317, "epoch": 5.83, "learning_rate": 2.314266929651545e-05, "loss": 0.979, "step": 6902, "task_loss": 0.6424713730812073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5613407492637634, "epoch": 5.83, "learning_rate": 2.313797313797314e-05, "loss": 0.8224, "step": 6903, "task_loss": 0.665805995464325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5366293787956238, "epoch": 5.84, "learning_rate": 2.3133276979430826e-05, "loss": 0.8141, "step": 6904, "task_loss": 1.5167591571807861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5052111148834229, "epoch": 5.84, "learning_rate": 2.3128580820888516e-05, "loss": 0.6873, "step": 6905, "task_loss": 1.0663334131240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7718274593353271, "epoch": 5.84, "learning_rate": 2.3123884662346202e-05, "loss": 0.6698, "step": 6906, "task_loss": 0.8493602871894836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1375393867492676, "epoch": 5.84, "learning_rate": 2.3119188503803892e-05, "loss": 1.0143, "step": 6907, "task_loss": 0.6042356491088867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8886361122131348, "epoch": 5.84, "learning_rate": 2.3114492345261575e-05, "loss": 0.875, "step": 6908, "task_loss": 1.5783445835113525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7825520038604736, "epoch": 5.84, "learning_rate": 2.3109796186719265e-05, "loss": 0.8119, "step": 6909, "task_loss": 0.7991464138031006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9669448137283325, "epoch": 5.84, "learning_rate": 2.310510002817695e-05, "loss": 0.8918, "step": 6910, "task_loss": 1.1243325471878052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9332430958747864, "epoch": 5.84, "learning_rate": 2.310040386963464e-05, "loss": 0.7433, "step": 6911, "task_loss": 0.773442804813385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7388519644737244, "epoch": 5.84, "learning_rate": 2.3095707711092327e-05, "loss": 0.7363, "step": 6912, "task_loss": 0.5961018800735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9556235074996948, "epoch": 5.84, "learning_rate": 2.3091011552550017e-05, "loss": 0.7352, "step": 6913, "task_loss": 1.1430150270462036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0854153633117676, "epoch": 5.84, "learning_rate": 2.3086315394007703e-05, "loss": 0.859, "step": 6914, "task_loss": 1.4004640579223633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6225603818893433, "epoch": 5.84, "learning_rate": 2.308161923546539e-05, "loss": 0.7295, "step": 6915, "task_loss": 0.7262918949127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9057717323303223, "epoch": 5.85, "learning_rate": 2.307692307692308e-05, "loss": 0.8642, "step": 6916, "task_loss": 0.670978844165802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.881833016872406, "epoch": 5.85, "learning_rate": 2.3072226918380765e-05, "loss": 0.7499, "step": 6917, "task_loss": 0.6133262515068054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8066921830177307, "epoch": 5.85, "learning_rate": 2.3067530759838455e-05, "loss": 0.8389, "step": 6918, "task_loss": 1.5226454734802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0238455533981323, "epoch": 5.85, "learning_rate": 2.306283460129614e-05, "loss": 0.8864, "step": 6919, "task_loss": 0.8819732069969177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1692633628845215, "epoch": 5.85, "learning_rate": 2.3058138442753828e-05, "loss": 0.9533, "step": 6920, "task_loss": 1.7323793172836304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8660577535629272, "epoch": 5.85, "learning_rate": 2.3053442284211514e-05, "loss": 0.7999, "step": 6921, "task_loss": 0.6514772772789001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6388611793518066, "epoch": 5.85, "learning_rate": 2.3048746125669204e-05, "loss": 0.7827, "step": 6922, "task_loss": 0.610383927822113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9167336225509644, "epoch": 5.85, "learning_rate": 2.304404996712689e-05, "loss": 0.7704, "step": 6923, "task_loss": 0.6501918435096741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5288246870040894, "epoch": 5.85, "learning_rate": 2.303935380858458e-05, "loss": 0.8486, "step": 6924, "task_loss": 0.47539663314819336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5187451839447021, "epoch": 5.85, "learning_rate": 2.3034657650042266e-05, "loss": 0.6627, "step": 6925, "task_loss": 0.30219000577926636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6163839101791382, "epoch": 5.85, "learning_rate": 2.3029961491499956e-05, "loss": 0.9476, "step": 6926, "task_loss": 0.6096378564834595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0600165128707886, "epoch": 5.85, "learning_rate": 2.302526533295764e-05, "loss": 0.9849, "step": 6927, "task_loss": 0.7635249495506287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0966877937316895, "epoch": 5.86, "learning_rate": 2.302056917441533e-05, "loss": 0.9959, "step": 6928, "task_loss": 0.8137834072113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8586665391921997, "epoch": 5.86, "learning_rate": 2.3015873015873015e-05, "loss": 1.0013, "step": 6929, "task_loss": 0.980431079864502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5253503918647766, "epoch": 5.86, "learning_rate": 2.3011176857330705e-05, "loss": 0.9951, "step": 6930, "task_loss": 1.20150887966156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6390697956085205, "epoch": 5.86, "learning_rate": 2.3006480698788394e-05, "loss": 0.7622, "step": 6931, "task_loss": 1.7462464570999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3124889135360718, "epoch": 5.86, "learning_rate": 2.300178454024608e-05, "loss": 0.984, "step": 6932, "task_loss": 1.8247573375701904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3460902273654938, "epoch": 5.86, "learning_rate": 2.2997088381703767e-05, "loss": 0.5183, "step": 6933, "task_loss": 0.6855204701423645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8788442015647888, "epoch": 5.86, "learning_rate": 2.2992392223161454e-05, "loss": 0.7912, "step": 6934, "task_loss": 1.0054244995117188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8220540881156921, "epoch": 5.86, "learning_rate": 2.2987696064619143e-05, "loss": 0.6779, "step": 6935, "task_loss": 1.8817849159240723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4200230836868286, "epoch": 5.86, "learning_rate": 2.298299990607683e-05, "loss": 1.1267, "step": 6936, "task_loss": 0.8536819815635681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7493687868118286, "epoch": 5.86, "learning_rate": 2.297830374753452e-05, "loss": 0.8365, "step": 6937, "task_loss": 0.6214563250541687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7632887363433838, "epoch": 5.86, "learning_rate": 2.2973607588992206e-05, "loss": 0.8551, "step": 6938, "task_loss": 0.12873922288417816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9851712584495544, "epoch": 5.87, "learning_rate": 2.2968911430449895e-05, "loss": 0.94, "step": 6939, "task_loss": 0.7460455298423767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7070729732513428, "epoch": 5.87, "learning_rate": 2.296421527190758e-05, "loss": 0.7423, "step": 6940, "task_loss": 0.782521665096283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3316031694412231, "epoch": 5.87, "learning_rate": 2.2959519113365268e-05, "loss": 1.0013, "step": 6941, "task_loss": 0.610973596572876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8394127488136292, "epoch": 5.87, "learning_rate": 2.2954822954822954e-05, "loss": 0.7113, "step": 6942, "task_loss": 0.39955854415893555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8137984275817871, "epoch": 5.87, "learning_rate": 2.2950126796280644e-05, "loss": 0.9763, "step": 6943, "task_loss": 1.2447491884231567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8746053576469421, "epoch": 5.87, "learning_rate": 2.294543063773833e-05, "loss": 0.8439, "step": 6944, "task_loss": 0.381888747215271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7498089671134949, "epoch": 5.87, "learning_rate": 2.294073447919602e-05, "loss": 0.8196, "step": 6945, "task_loss": 1.0139275789260864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6087511777877808, "epoch": 5.87, "learning_rate": 2.2936038320653707e-05, "loss": 0.7155, "step": 6946, "task_loss": 0.14695659279823303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7072761058807373, "epoch": 5.87, "learning_rate": 2.2931342162111393e-05, "loss": 0.9015, "step": 6947, "task_loss": 0.6744276285171509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.137140154838562, "epoch": 5.87, "learning_rate": 2.2926646003569083e-05, "loss": 0.8566, "step": 6948, "task_loss": 1.8221889734268188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45189398527145386, "epoch": 5.87, "learning_rate": 2.292194984502677e-05, "loss": 0.6702, "step": 6949, "task_loss": 0.5211673974990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6708605289459229, "epoch": 5.87, "learning_rate": 2.291725368648446e-05, "loss": 0.9045, "step": 6950, "task_loss": 0.5179941654205322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9778361320495605, "epoch": 5.88, "learning_rate": 2.2912557527942145e-05, "loss": 1.005, "step": 6951, "task_loss": 1.316859245300293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8110535144805908, "epoch": 5.88, "learning_rate": 2.290786136939983e-05, "loss": 0.8943, "step": 6952, "task_loss": 0.9041056632995605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8837547898292542, "epoch": 5.88, "learning_rate": 2.2903165210857518e-05, "loss": 0.7391, "step": 6953, "task_loss": 1.5919286012649536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7427752017974854, "epoch": 5.88, "learning_rate": 2.2898469052315207e-05, "loss": 0.7419, "step": 6954, "task_loss": 0.2517172396183014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9190841913223267, "epoch": 5.88, "learning_rate": 2.2893772893772894e-05, "loss": 0.6895, "step": 6955, "task_loss": 0.6325318217277527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8149527311325073, "epoch": 5.88, "learning_rate": 2.2889076735230583e-05, "loss": 1.0495, "step": 6956, "task_loss": 1.2683438062667847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2799642086029053, "epoch": 5.88, "learning_rate": 2.288438057668827e-05, "loss": 1.1165, "step": 6957, "task_loss": 1.1726783514022827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.586638867855072, "epoch": 5.88, "learning_rate": 2.287968441814596e-05, "loss": 0.979, "step": 6958, "task_loss": 0.6093842387199402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.122387409210205, "epoch": 5.88, "learning_rate": 2.2874988259603642e-05, "loss": 0.9169, "step": 6959, "task_loss": 0.9772908687591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8854989409446716, "epoch": 5.88, "learning_rate": 2.2870292101061332e-05, "loss": 0.7951, "step": 6960, "task_loss": 0.8777256608009338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.874219536781311, "epoch": 5.88, "learning_rate": 2.286559594251902e-05, "loss": 0.8117, "step": 6961, "task_loss": 0.7840709090232849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8511516451835632, "epoch": 5.88, "learning_rate": 2.2860899783976708e-05, "loss": 0.8148, "step": 6962, "task_loss": 0.33824560046195984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6783667206764221, "epoch": 5.89, "learning_rate": 2.2856203625434398e-05, "loss": 0.778, "step": 6963, "task_loss": 1.1324853897094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5567435622215271, "epoch": 5.89, "learning_rate": 2.2851507466892084e-05, "loss": 0.5621, "step": 6964, "task_loss": 0.49745652079582214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8053522109985352, "epoch": 5.89, "learning_rate": 2.284681130834977e-05, "loss": 1.0254, "step": 6965, "task_loss": 0.5220735669136047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3973289132118225, "epoch": 5.89, "learning_rate": 2.2842115149807457e-05, "loss": 0.9998, "step": 6966, "task_loss": 0.6074799299240112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9571936130523682, "epoch": 5.89, "learning_rate": 2.2837418991265147e-05, "loss": 0.9284, "step": 6967, "task_loss": 0.9362512826919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3906312882900238, "epoch": 5.89, "learning_rate": 2.2832722832722833e-05, "loss": 0.7246, "step": 6968, "task_loss": 0.7789096832275391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.057280421257019, "epoch": 5.89, "learning_rate": 2.2828026674180523e-05, "loss": 1.2006, "step": 6969, "task_loss": 1.5188665390014648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5446982383728027, "epoch": 5.89, "learning_rate": 2.282333051563821e-05, "loss": 0.7341, "step": 6970, "task_loss": 0.2737044394016266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0223560333251953, "epoch": 5.89, "learning_rate": 2.2818634357095896e-05, "loss": 0.8467, "step": 6971, "task_loss": 0.8495752215385437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.728813111782074, "epoch": 5.89, "learning_rate": 2.2813938198553582e-05, "loss": 0.8262, "step": 6972, "task_loss": 0.8715047836303711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7089194655418396, "epoch": 5.89, "learning_rate": 2.280924204001127e-05, "loss": 0.6547, "step": 6973, "task_loss": 1.1652207374572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0218008756637573, "epoch": 5.89, "learning_rate": 2.2804545881468958e-05, "loss": 0.8641, "step": 6974, "task_loss": 1.442299485206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8833197951316833, "epoch": 5.9, "learning_rate": 2.2799849722926648e-05, "loss": 0.7985, "step": 6975, "task_loss": 1.2060737609863281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6676265597343445, "epoch": 5.9, "learning_rate": 2.2795153564384334e-05, "loss": 0.8664, "step": 6976, "task_loss": 0.7294397354125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7121256589889526, "epoch": 5.9, "learning_rate": 2.2790457405842024e-05, "loss": 0.9169, "step": 6977, "task_loss": 0.4984844923019409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.73046875, "epoch": 5.9, "learning_rate": 2.278576124729971e-05, "loss": 1.1071, "step": 6978, "task_loss": 1.3300596475601196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6954641342163086, "epoch": 5.9, "learning_rate": 2.2781065088757396e-05, "loss": 0.867, "step": 6979, "task_loss": 0.41550078988075256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0882480144500732, "epoch": 5.9, "learning_rate": 2.2776368930215086e-05, "loss": 1.027, "step": 6980, "task_loss": 1.1319646835327148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9222243428230286, "epoch": 5.9, "learning_rate": 2.2771672771672772e-05, "loss": 0.8067, "step": 6981, "task_loss": 1.798500657081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5047727823257446, "epoch": 5.9, "learning_rate": 2.2766976613130462e-05, "loss": 0.6644, "step": 6982, "task_loss": 0.4798748791217804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7138422727584839, "epoch": 5.9, "learning_rate": 2.276228045458815e-05, "loss": 1.0872, "step": 6983, "task_loss": 1.0998626947402954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6942945718765259, "epoch": 5.9, "learning_rate": 2.2757584296045835e-05, "loss": 0.8228, "step": 6984, "task_loss": 1.0934557914733887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9101066589355469, "epoch": 5.9, "learning_rate": 2.275288813750352e-05, "loss": 0.7848, "step": 6985, "task_loss": 0.5921005010604858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5204304456710815, "epoch": 5.9, "learning_rate": 2.274819197896121e-05, "loss": 0.6916, "step": 6986, "task_loss": 1.2465870380401611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9243582487106323, "epoch": 5.91, "learning_rate": 2.2743495820418897e-05, "loss": 0.8488, "step": 6987, "task_loss": 1.042914867401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7971748113632202, "epoch": 5.91, "learning_rate": 2.2738799661876587e-05, "loss": 0.7783, "step": 6988, "task_loss": 0.983962893486023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7548261880874634, "epoch": 5.91, "learning_rate": 2.2734103503334273e-05, "loss": 0.7227, "step": 6989, "task_loss": 0.329174667596817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8483241200447083, "epoch": 5.91, "learning_rate": 2.2729407344791963e-05, "loss": 0.9124, "step": 6990, "task_loss": 2.1100800037384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9797222018241882, "epoch": 5.91, "learning_rate": 2.2724711186249646e-05, "loss": 0.8651, "step": 6991, "task_loss": 1.416909098625183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7715015411376953, "epoch": 5.91, "learning_rate": 2.2720015027707336e-05, "loss": 0.9823, "step": 6992, "task_loss": 1.918108344078064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7222669720649719, "epoch": 5.91, "learning_rate": 2.2715318869165025e-05, "loss": 0.8228, "step": 6993, "task_loss": 0.8216475248336792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2220818996429443, "epoch": 5.91, "learning_rate": 2.2710622710622712e-05, "loss": 1.1323, "step": 6994, "task_loss": 1.4086518287658691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0232889652252197, "epoch": 5.91, "learning_rate": 2.27059265520804e-05, "loss": 0.8367, "step": 6995, "task_loss": 0.6310581564903259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8945904970169067, "epoch": 5.91, "learning_rate": 2.2701230393538088e-05, "loss": 1.0898, "step": 6996, "task_loss": 0.9773446321487427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8529220223426819, "epoch": 5.91, "learning_rate": 2.2696534234995774e-05, "loss": 0.8647, "step": 6997, "task_loss": 0.6171911358833313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1637630462646484, "epoch": 5.91, "learning_rate": 2.269183807645346e-05, "loss": 0.8855, "step": 6998, "task_loss": 1.7974998950958252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1789970397949219, "epoch": 5.92, "learning_rate": 2.268714191791115e-05, "loss": 0.8982, "step": 6999, "task_loss": 1.2158973217010498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.683456301689148, "epoch": 5.92, "learning_rate": 2.2682445759368837e-05, "loss": 0.7951, "step": 7000, "task_loss": 0.6065704822540283 }, { "epoch": 5.92, "eval_accuracy": 0.8838415841584158, "eval_loss": 0.5150585770606995, "eval_runtime": 224.5706, "eval_samples_per_second": 112.437, "eval_steps_per_second": 0.882, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9588413238525391, "epoch": 5.92, "learning_rate": 2.2677749600826526e-05, "loss": 0.7176, "step": 7001, "task_loss": 1.2816747426986694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3691787719726562, "epoch": 5.92, "learning_rate": 2.2673053442284213e-05, "loss": 0.8769, "step": 7002, "task_loss": 1.5047285556793213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.800286054611206, "epoch": 5.92, "learning_rate": 2.26683572837419e-05, "loss": 0.7655, "step": 7003, "task_loss": 0.6300666928291321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3300762176513672, "epoch": 5.92, "learning_rate": 2.2663661125199585e-05, "loss": 0.9512, "step": 7004, "task_loss": 1.9579286575317383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8971841335296631, "epoch": 5.92, "learning_rate": 2.2658964966657275e-05, "loss": 0.9581, "step": 7005, "task_loss": 0.2554986774921417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6385400295257568, "epoch": 5.92, "learning_rate": 2.265426880811496e-05, "loss": 0.8054, "step": 7006, "task_loss": 0.2977989614009857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6598544120788574, "epoch": 5.92, "learning_rate": 2.264957264957265e-05, "loss": 0.7568, "step": 7007, "task_loss": 0.6481936573982239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.788580596446991, "epoch": 5.92, "learning_rate": 2.264487649103034e-05, "loss": 0.7236, "step": 7008, "task_loss": 1.1272695064544678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9879619479179382, "epoch": 5.92, "learning_rate": 2.2640180332488027e-05, "loss": 0.8172, "step": 7009, "task_loss": 1.2341886758804321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9914407134056091, "epoch": 5.93, "learning_rate": 2.2635484173945714e-05, "loss": 0.7638, "step": 7010, "task_loss": 1.3693681955337524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9357978105545044, "epoch": 5.93, "learning_rate": 2.26307880154034e-05, "loss": 0.6903, "step": 7011, "task_loss": 1.2228977680206299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0779452323913574, "epoch": 5.93, "learning_rate": 2.262609185686109e-05, "loss": 0.8405, "step": 7012, "task_loss": 0.5646799206733704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4261345863342285, "epoch": 5.93, "learning_rate": 2.2621395698318776e-05, "loss": 0.9865, "step": 7013, "task_loss": 1.2686917781829834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8649443984031677, "epoch": 5.93, "learning_rate": 2.2616699539776466e-05, "loss": 0.8531, "step": 7014, "task_loss": 0.6419537663459778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9455982446670532, "epoch": 5.93, "learning_rate": 2.2612003381234152e-05, "loss": 1.1514, "step": 7015, "task_loss": 1.1854740381240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8388601541519165, "epoch": 5.93, "learning_rate": 2.260730722269184e-05, "loss": 0.8679, "step": 7016, "task_loss": 1.5066797733306885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.03610098361969, "epoch": 5.93, "learning_rate": 2.2602611064149525e-05, "loss": 0.7421, "step": 7017, "task_loss": 1.0045922994613647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8432217240333557, "epoch": 5.93, "learning_rate": 2.2597914905607214e-05, "loss": 0.8514, "step": 7018, "task_loss": 0.3781205117702484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47558140754699707, "epoch": 5.93, "learning_rate": 2.25932187470649e-05, "loss": 0.6619, "step": 7019, "task_loss": 0.15561926364898682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7415368556976318, "epoch": 5.93, "learning_rate": 2.258852258852259e-05, "loss": 0.7535, "step": 7020, "task_loss": 0.9931349754333496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9540878534317017, "epoch": 5.93, "learning_rate": 2.2583826429980277e-05, "loss": 1.0486, "step": 7021, "task_loss": 1.3202139139175415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7641855478286743, "epoch": 5.94, "learning_rate": 2.2579130271437963e-05, "loss": 0.7483, "step": 7022, "task_loss": 0.9984277486801147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.765364408493042, "epoch": 5.94, "learning_rate": 2.2574434112895653e-05, "loss": 0.9977, "step": 7023, "task_loss": 1.194465160369873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3132104277610779, "epoch": 5.94, "learning_rate": 2.256973795435334e-05, "loss": 0.5363, "step": 7024, "task_loss": 0.4265490174293518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9456419944763184, "epoch": 5.94, "learning_rate": 2.256504179581103e-05, "loss": 0.9498, "step": 7025, "task_loss": 0.6996541619300842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.217101812362671, "epoch": 5.94, "learning_rate": 2.2560345637268715e-05, "loss": 0.8973, "step": 7026, "task_loss": 0.5073807239532471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.022989273071289, "epoch": 5.94, "learning_rate": 2.2555649478726405e-05, "loss": 0.903, "step": 7027, "task_loss": 1.0329394340515137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4081493318080902, "epoch": 5.94, "learning_rate": 2.255095332018409e-05, "loss": 0.7252, "step": 7028, "task_loss": 0.6235049366950989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7852829694747925, "epoch": 5.94, "learning_rate": 2.2546257161641778e-05, "loss": 0.7015, "step": 7029, "task_loss": 1.5458372831344604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3363259434700012, "epoch": 5.94, "learning_rate": 2.2541561003099464e-05, "loss": 0.6224, "step": 7030, "task_loss": 0.22201451659202576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8587237000465393, "epoch": 5.94, "learning_rate": 2.2536864844557154e-05, "loss": 0.7754, "step": 7031, "task_loss": 1.8809257745742798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9103831052780151, "epoch": 5.94, "learning_rate": 2.253216868601484e-05, "loss": 0.9604, "step": 7032, "task_loss": 1.038590431213379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7720468044281006, "epoch": 5.94, "learning_rate": 2.252747252747253e-05, "loss": 0.8512, "step": 7033, "task_loss": 1.3782955408096313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1836183071136475, "epoch": 5.95, "learning_rate": 2.2522776368930216e-05, "loss": 0.7274, "step": 7034, "task_loss": 1.0465004444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5925596952438354, "epoch": 5.95, "learning_rate": 2.2518080210387903e-05, "loss": 0.7852, "step": 7035, "task_loss": 0.697490930557251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5514025688171387, "epoch": 5.95, "learning_rate": 2.251338405184559e-05, "loss": 0.6209, "step": 7036, "task_loss": 0.5737578868865967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.017447590827942, "epoch": 5.95, "learning_rate": 2.250868789330328e-05, "loss": 1.0084, "step": 7037, "task_loss": 1.4770710468292236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7246628999710083, "epoch": 5.95, "learning_rate": 2.2503991734760965e-05, "loss": 0.6628, "step": 7038, "task_loss": 1.2673052549362183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8188577890396118, "epoch": 5.95, "learning_rate": 2.2499295576218655e-05, "loss": 0.8567, "step": 7039, "task_loss": 2.572810649871826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5957992076873779, "epoch": 5.95, "learning_rate": 2.2494599417676344e-05, "loss": 0.8747, "step": 7040, "task_loss": 0.18199940025806427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.70896977186203, "epoch": 5.95, "learning_rate": 2.248990325913403e-05, "loss": 0.6841, "step": 7041, "task_loss": 0.46921682357788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8619024753570557, "epoch": 5.95, "learning_rate": 2.2485207100591717e-05, "loss": 0.8581, "step": 7042, "task_loss": 0.3523638844490051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.750055193901062, "epoch": 5.95, "learning_rate": 2.2480510942049403e-05, "loss": 0.9406, "step": 7043, "task_loss": 1.5775014162063599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8647449612617493, "epoch": 5.95, "learning_rate": 2.2475814783507093e-05, "loss": 0.9011, "step": 7044, "task_loss": 1.0744491815567017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5677909851074219, "epoch": 5.95, "learning_rate": 2.247111862496478e-05, "loss": 0.7126, "step": 7045, "task_loss": 1.059313178062439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6422876715660095, "epoch": 5.96, "learning_rate": 2.246642246642247e-05, "loss": 0.7657, "step": 7046, "task_loss": 0.8289316296577454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6676558256149292, "epoch": 5.96, "learning_rate": 2.2461726307880156e-05, "loss": 0.8593, "step": 7047, "task_loss": 0.2909344434738159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48784780502319336, "epoch": 5.96, "learning_rate": 2.2457030149337842e-05, "loss": 0.6151, "step": 7048, "task_loss": 0.9122986197471619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9367936849594116, "epoch": 5.96, "learning_rate": 2.2452333990795528e-05, "loss": 0.8298, "step": 7049, "task_loss": 0.8969089388847351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1982269287109375, "epoch": 5.96, "learning_rate": 2.2447637832253218e-05, "loss": 0.9218, "step": 7050, "task_loss": 1.847790002822876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5890570878982544, "epoch": 5.96, "learning_rate": 2.2442941673710904e-05, "loss": 0.5573, "step": 7051, "task_loss": 0.5556719303131104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41060635447502136, "epoch": 5.96, "learning_rate": 2.2438245515168594e-05, "loss": 0.8207, "step": 7052, "task_loss": 0.3406837582588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.935823380947113, "epoch": 5.96, "learning_rate": 2.243354935662628e-05, "loss": 0.8276, "step": 7053, "task_loss": 0.6874507665634155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.297155499458313, "epoch": 5.96, "learning_rate": 2.2428853198083967e-05, "loss": 1.1128, "step": 7054, "task_loss": 1.421630620956421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5736539959907532, "epoch": 5.96, "learning_rate": 2.2424157039541656e-05, "loss": 0.9507, "step": 7055, "task_loss": 0.4416114091873169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46343064308166504, "epoch": 5.96, "learning_rate": 2.2419460880999343e-05, "loss": 0.9748, "step": 7056, "task_loss": 0.12594757974147797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36130693554878235, "epoch": 5.96, "learning_rate": 2.2414764722457033e-05, "loss": 1.0106, "step": 7057, "task_loss": 0.5704684257507324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7424352765083313, "epoch": 5.97, "learning_rate": 2.241006856391472e-05, "loss": 1.0201, "step": 7058, "task_loss": 1.4417438507080078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7421086430549622, "epoch": 5.97, "learning_rate": 2.240537240537241e-05, "loss": 0.9251, "step": 7059, "task_loss": 0.9259775280952454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5388233661651611, "epoch": 5.97, "learning_rate": 2.2400676246830095e-05, "loss": 0.7348, "step": 7060, "task_loss": 1.284247875213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.365987777709961, "epoch": 5.97, "learning_rate": 2.239598008828778e-05, "loss": 0.9939, "step": 7061, "task_loss": 1.0275120735168457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8863250613212585, "epoch": 5.97, "learning_rate": 2.2391283929745468e-05, "loss": 1.0017, "step": 7062, "task_loss": 0.7532990574836731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35293763875961304, "epoch": 5.97, "learning_rate": 2.2386587771203157e-05, "loss": 0.7771, "step": 7063, "task_loss": 0.5053237676620483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7880846261978149, "epoch": 5.97, "learning_rate": 2.2381891612660844e-05, "loss": 0.6985, "step": 7064, "task_loss": 0.7875204086303711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7210752964019775, "epoch": 5.97, "learning_rate": 2.2377195454118533e-05, "loss": 0.8243, "step": 7065, "task_loss": 1.0389220714569092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7914570569992065, "epoch": 5.97, "learning_rate": 2.237249929557622e-05, "loss": 0.7726, "step": 7066, "task_loss": 0.4207613468170166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7843008041381836, "epoch": 5.97, "learning_rate": 2.2367803137033906e-05, "loss": 0.7116, "step": 7067, "task_loss": 1.1446281671524048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0661332607269287, "epoch": 5.97, "learning_rate": 2.2363106978491592e-05, "loss": 0.7027, "step": 7068, "task_loss": 0.8612820506095886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5790665149688721, "epoch": 5.97, "learning_rate": 2.2358410819949282e-05, "loss": 0.7596, "step": 7069, "task_loss": 0.9179668426513672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6164804697036743, "epoch": 5.98, "learning_rate": 2.2353714661406972e-05, "loss": 0.9001, "step": 7070, "task_loss": 0.6243817806243896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1339552402496338, "epoch": 5.98, "learning_rate": 2.2349018502864658e-05, "loss": 1.1157, "step": 7071, "task_loss": 1.8071458339691162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7230014204978943, "epoch": 5.98, "learning_rate": 2.2344322344322348e-05, "loss": 0.8089, "step": 7072, "task_loss": 1.2446247339248657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1915570497512817, "epoch": 5.98, "learning_rate": 2.2339626185780034e-05, "loss": 1.0401, "step": 7073, "task_loss": 0.9752329587936401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6162565350532532, "epoch": 5.98, "learning_rate": 2.233493002723772e-05, "loss": 0.8273, "step": 7074, "task_loss": 0.37752223014831543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6434027552604675, "epoch": 5.98, "learning_rate": 2.2330233868695407e-05, "loss": 0.9642, "step": 7075, "task_loss": 0.39203447103500366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6678682565689087, "epoch": 5.98, "learning_rate": 2.2325537710153097e-05, "loss": 0.6932, "step": 7076, "task_loss": 0.5934561491012573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.611844539642334, "epoch": 5.98, "learning_rate": 2.2320841551610783e-05, "loss": 0.8508, "step": 7077, "task_loss": 0.23391200602054596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6665818095207214, "epoch": 5.98, "learning_rate": 2.2316145393068473e-05, "loss": 0.7439, "step": 7078, "task_loss": 0.30145263671875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9503761529922485, "epoch": 5.98, "learning_rate": 2.231144923452616e-05, "loss": 0.8294, "step": 7079, "task_loss": 2.1329708099365234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5071483254432678, "epoch": 5.98, "learning_rate": 2.2306753075983845e-05, "loss": 0.7459, "step": 7080, "task_loss": 0.8000216484069824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1570560932159424, "epoch": 5.99, "learning_rate": 2.2302056917441532e-05, "loss": 1.1422, "step": 7081, "task_loss": 0.6803814172744751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5520585775375366, "epoch": 5.99, "learning_rate": 2.229736075889922e-05, "loss": 0.8093, "step": 7082, "task_loss": 0.8136605620384216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8256561756134033, "epoch": 5.99, "learning_rate": 2.2292664600356908e-05, "loss": 1.0303, "step": 7083, "task_loss": 0.8956756591796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8932708501815796, "epoch": 5.99, "learning_rate": 2.2287968441814598e-05, "loss": 1.0735, "step": 7084, "task_loss": 0.9514458179473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5048891305923462, "epoch": 5.99, "learning_rate": 2.2283272283272287e-05, "loss": 0.6773, "step": 7085, "task_loss": 1.1843836307525635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7672263979911804, "epoch": 5.99, "learning_rate": 2.227857612472997e-05, "loss": 1.0116, "step": 7086, "task_loss": 0.047299567610025406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.845800518989563, "epoch": 5.99, "learning_rate": 2.227387996618766e-05, "loss": 0.8056, "step": 7087, "task_loss": 0.9710116386413574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5241060853004456, "epoch": 5.99, "learning_rate": 2.2269183807645346e-05, "loss": 0.6622, "step": 7088, "task_loss": 0.5125356912612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3953444957733154, "epoch": 5.99, "learning_rate": 2.2264487649103036e-05, "loss": 0.8504, "step": 7089, "task_loss": 1.9851305484771729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6369837522506714, "epoch": 5.99, "learning_rate": 2.2259791490560722e-05, "loss": 0.6087, "step": 7090, "task_loss": 0.2604276239871979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7294655442237854, "epoch": 5.99, "learning_rate": 2.2255095332018412e-05, "loss": 0.7701, "step": 7091, "task_loss": 0.835907518863678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6402999758720398, "epoch": 5.99, "learning_rate": 2.22503991734761e-05, "loss": 0.6667, "step": 7092, "task_loss": 1.5920175313949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7890723943710327, "epoch": 6.0, "learning_rate": 2.2245703014933785e-05, "loss": 0.948, "step": 7093, "task_loss": 0.46619632840156555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9958089590072632, "epoch": 6.0, "learning_rate": 2.224100685639147e-05, "loss": 0.7747, "step": 7094, "task_loss": 1.7218952178955078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7369644045829773, "epoch": 6.0, "learning_rate": 2.223631069784916e-05, "loss": 1.0508, "step": 7095, "task_loss": 0.6656375527381897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6014696955680847, "epoch": 6.0, "learning_rate": 2.2231614539306847e-05, "loss": 0.8388, "step": 7096, "task_loss": 1.1683316230773926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5023189187049866, "epoch": 6.0, "learning_rate": 2.2226918380764537e-05, "loss": 0.8474, "step": 7097, "task_loss": 0.46635836362838745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3413389921188354, "epoch": 6.0, "learning_rate": 2.2222222222222223e-05, "loss": 1.1693, "step": 7098, "task_loss": 2.1103053092956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38380709290504456, "epoch": 6.0, "learning_rate": 2.221752606367991e-05, "loss": 1.0348, "step": 7099, "task_loss": 0.45267486572265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8650040626525879, "epoch": 6.0, "learning_rate": 2.22128299051376e-05, "loss": 0.7859, "step": 7100, "task_loss": 0.8456686735153198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7220553159713745, "epoch": 6.0, "learning_rate": 2.2208133746595286e-05, "loss": 0.6328, "step": 7101, "task_loss": 0.88136225938797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42263901233673096, "epoch": 6.0, "learning_rate": 2.2203437588052975e-05, "loss": 0.7556, "step": 7102, "task_loss": 0.16944269835948944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7597610950469971, "epoch": 6.0, "learning_rate": 2.2198741429510662e-05, "loss": 0.7617, "step": 7103, "task_loss": 0.6251004934310913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7827243804931641, "epoch": 6.01, "learning_rate": 2.219404527096835e-05, "loss": 0.9542, "step": 7104, "task_loss": 0.7290997505187988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6937300562858582, "epoch": 6.01, "learning_rate": 2.2189349112426034e-05, "loss": 0.753, "step": 7105, "task_loss": 0.5165429711341858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5440090298652649, "epoch": 6.01, "learning_rate": 2.2184652953883724e-05, "loss": 0.69, "step": 7106, "task_loss": 0.9731672406196594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1508285999298096, "epoch": 6.01, "learning_rate": 2.217995679534141e-05, "loss": 0.8466, "step": 7107, "task_loss": 1.384342908859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1506946086883545, "epoch": 6.01, "learning_rate": 2.21752606367991e-05, "loss": 0.7503, "step": 7108, "task_loss": 1.3345965147018433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6869202852249146, "epoch": 6.01, "learning_rate": 2.2170564478256787e-05, "loss": 0.6771, "step": 7109, "task_loss": 0.7129122018814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9743735790252686, "epoch": 6.01, "learning_rate": 2.2165868319714476e-05, "loss": 0.7487, "step": 7110, "task_loss": 0.19290515780448914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6354200839996338, "epoch": 6.01, "learning_rate": 2.2161172161172163e-05, "loss": 1.0086, "step": 7111, "task_loss": 0.7354514002799988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.529369056224823, "epoch": 6.01, "learning_rate": 2.215647600262985e-05, "loss": 0.8747, "step": 7112, "task_loss": 0.7682474851608276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9894292950630188, "epoch": 6.01, "learning_rate": 2.2151779844087535e-05, "loss": 0.877, "step": 7113, "task_loss": 1.9297856092453003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5690426826477051, "epoch": 6.01, "learning_rate": 2.2147083685545225e-05, "loss": 0.5735, "step": 7114, "task_loss": 0.2082011103630066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9326813220977783, "epoch": 6.01, "learning_rate": 2.214238752700291e-05, "loss": 0.805, "step": 7115, "task_loss": 0.8731945753097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6090649962425232, "epoch": 6.02, "learning_rate": 2.21376913684606e-05, "loss": 0.8732, "step": 7116, "task_loss": 0.18333499133586884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7906862497329712, "epoch": 6.02, "learning_rate": 2.2132995209918287e-05, "loss": 0.5838, "step": 7117, "task_loss": 0.6922846436500549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5143712759017944, "epoch": 6.02, "learning_rate": 2.2128299051375974e-05, "loss": 0.8429, "step": 7118, "task_loss": 0.5707194209098816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0517569780349731, "epoch": 6.02, "learning_rate": 2.2123602892833664e-05, "loss": 0.7796, "step": 7119, "task_loss": 1.7226910591125488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5328706502914429, "epoch": 6.02, "learning_rate": 2.211890673429135e-05, "loss": 0.7218, "step": 7120, "task_loss": 1.0554251670837402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9534875154495239, "epoch": 6.02, "learning_rate": 2.211421057574904e-05, "loss": 0.8273, "step": 7121, "task_loss": 0.7329445481300354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7075216770172119, "epoch": 6.02, "learning_rate": 2.2109514417206726e-05, "loss": 0.7766, "step": 7122, "task_loss": 0.44506293535232544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9938985109329224, "epoch": 6.02, "learning_rate": 2.2104818258664416e-05, "loss": 0.8157, "step": 7123, "task_loss": 0.37984177470207214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8548861742019653, "epoch": 6.02, "learning_rate": 2.2100122100122102e-05, "loss": 0.8224, "step": 7124, "task_loss": 0.6260621547698975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.015764594078064, "epoch": 6.02, "learning_rate": 2.209542594157979e-05, "loss": 0.8965, "step": 7125, "task_loss": 1.584453821182251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8498431444168091, "epoch": 6.02, "learning_rate": 2.2090729783037475e-05, "loss": 0.6671, "step": 7126, "task_loss": 1.1376436948776245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8616166710853577, "epoch": 6.02, "learning_rate": 2.2086033624495164e-05, "loss": 0.8606, "step": 7127, "task_loss": 0.3699338734149933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1592398881912231, "epoch": 6.03, "learning_rate": 2.208133746595285e-05, "loss": 0.9588, "step": 7128, "task_loss": 0.8832799196243286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.620702862739563, "epoch": 6.03, "learning_rate": 2.207664130741054e-05, "loss": 0.9765, "step": 7129, "task_loss": 0.9461565017700195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4700899124145508, "epoch": 6.03, "learning_rate": 2.2071945148868227e-05, "loss": 0.6986, "step": 7130, "task_loss": 0.6697307229042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2398372888565063, "epoch": 6.03, "learning_rate": 2.2067248990325913e-05, "loss": 1.098, "step": 7131, "task_loss": 0.8978198766708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8548671007156372, "epoch": 6.03, "learning_rate": 2.2062552831783603e-05, "loss": 0.6506, "step": 7132, "task_loss": 0.2794896960258484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5915980339050293, "epoch": 6.03, "learning_rate": 2.205785667324129e-05, "loss": 0.9059, "step": 7133, "task_loss": 0.43156757950782776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9717321991920471, "epoch": 6.03, "learning_rate": 2.205316051469898e-05, "loss": 0.7042, "step": 7134, "task_loss": 0.6891641020774841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.312893271446228, "epoch": 6.03, "learning_rate": 2.2048464356156665e-05, "loss": 0.8802, "step": 7135, "task_loss": 1.6514629125595093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6723796129226685, "epoch": 6.03, "learning_rate": 2.2043768197614355e-05, "loss": 0.7898, "step": 7136, "task_loss": 0.09357137233018875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.125716209411621, "epoch": 6.03, "learning_rate": 2.2039072039072038e-05, "loss": 0.9075, "step": 7137, "task_loss": 0.8880801200866699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8031906485557556, "epoch": 6.03, "learning_rate": 2.2034375880529728e-05, "loss": 0.8184, "step": 7138, "task_loss": 1.023810863494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3047631084918976, "epoch": 6.03, "learning_rate": 2.2029679721987414e-05, "loss": 0.512, "step": 7139, "task_loss": 0.7965165972709656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1894936561584473, "epoch": 6.04, "learning_rate": 2.2024983563445104e-05, "loss": 0.822, "step": 7140, "task_loss": 0.5055736303329468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7172307968139648, "epoch": 6.04, "learning_rate": 2.202028740490279e-05, "loss": 1.0584, "step": 7141, "task_loss": 0.575607419013977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6257330179214478, "epoch": 6.04, "learning_rate": 2.201559124636048e-05, "loss": 0.8242, "step": 7142, "task_loss": 1.00485098361969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8556972742080688, "epoch": 6.04, "learning_rate": 2.2010895087818166e-05, "loss": 0.7311, "step": 7143, "task_loss": 0.41729289293289185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5250390768051147, "epoch": 6.04, "learning_rate": 2.2006198929275853e-05, "loss": 0.6189, "step": 7144, "task_loss": 0.4488837718963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8957421183586121, "epoch": 6.04, "learning_rate": 2.200150277073354e-05, "loss": 0.8399, "step": 7145, "task_loss": 1.7091084718704224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7141124606132507, "epoch": 6.04, "learning_rate": 2.199680661219123e-05, "loss": 0.9786, "step": 7146, "task_loss": 0.5973165035247803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4809889793395996, "epoch": 6.04, "learning_rate": 2.199211045364892e-05, "loss": 0.8487, "step": 7147, "task_loss": 0.7864792346954346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1007254123687744, "epoch": 6.04, "learning_rate": 2.1987414295106605e-05, "loss": 0.8665, "step": 7148, "task_loss": 1.084396481513977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1862244606018066, "epoch": 6.04, "learning_rate": 2.198271813656429e-05, "loss": 0.7627, "step": 7149, "task_loss": 0.9052022695541382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8797260522842407, "epoch": 6.04, "learning_rate": 2.1978021978021977e-05, "loss": 1.012, "step": 7150, "task_loss": 0.8473811745643616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5255821943283081, "epoch": 6.04, "learning_rate": 2.1973325819479667e-05, "loss": 0.5598, "step": 7151, "task_loss": 0.6623478531837463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7809185981750488, "epoch": 6.05, "learning_rate": 2.1968629660937353e-05, "loss": 0.6905, "step": 7152, "task_loss": 0.8511772155761719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5125645995140076, "epoch": 6.05, "learning_rate": 2.1963933502395043e-05, "loss": 0.7509, "step": 7153, "task_loss": 0.4534519612789154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0601935386657715, "epoch": 6.05, "learning_rate": 2.195923734385273e-05, "loss": 0.896, "step": 7154, "task_loss": 0.8400830626487732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9168382883071899, "epoch": 6.05, "learning_rate": 2.195454118531042e-05, "loss": 0.8192, "step": 7155, "task_loss": 0.9539080262184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9142945408821106, "epoch": 6.05, "learning_rate": 2.1949845026768106e-05, "loss": 0.9187, "step": 7156, "task_loss": 0.9168631434440613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4489479660987854, "epoch": 6.05, "learning_rate": 2.1945148868225792e-05, "loss": 0.8247, "step": 7157, "task_loss": 0.18643520772457123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5891568660736084, "epoch": 6.05, "learning_rate": 2.1940452709683478e-05, "loss": 0.7695, "step": 7158, "task_loss": 0.2961452007293701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5328854322433472, "epoch": 6.05, "learning_rate": 2.1935756551141168e-05, "loss": 0.6078, "step": 7159, "task_loss": 0.37295442819595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7369132041931152, "epoch": 6.05, "learning_rate": 2.1931060392598854e-05, "loss": 0.6495, "step": 7160, "task_loss": 0.8477520942687988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5799646377563477, "epoch": 6.05, "learning_rate": 2.1926364234056544e-05, "loss": 0.8054, "step": 7161, "task_loss": 1.1240569353103638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5039221048355103, "epoch": 6.05, "learning_rate": 2.192166807551423e-05, "loss": 0.6328, "step": 7162, "task_loss": 0.596105694770813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9567481875419617, "epoch": 6.05, "learning_rate": 2.1916971916971917e-05, "loss": 0.655, "step": 7163, "task_loss": 0.6072888374328613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.606728196144104, "epoch": 6.06, "learning_rate": 2.1912275758429606e-05, "loss": 0.5885, "step": 7164, "task_loss": 0.3200775980949402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.734768271446228, "epoch": 6.06, "learning_rate": 2.1907579599887293e-05, "loss": 0.7925, "step": 7165, "task_loss": 0.40596508979797363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4207037091255188, "epoch": 6.06, "learning_rate": 2.1902883441344982e-05, "loss": 0.6016, "step": 7166, "task_loss": 0.49969935417175293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3173852264881134, "epoch": 6.06, "learning_rate": 2.189818728280267e-05, "loss": 0.5807, "step": 7167, "task_loss": 0.3355173170566559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4658110439777374, "epoch": 6.06, "learning_rate": 2.189349112426036e-05, "loss": 0.7398, "step": 7168, "task_loss": 0.16004134714603424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7698280811309814, "epoch": 6.06, "learning_rate": 2.188879496571804e-05, "loss": 0.6663, "step": 7169, "task_loss": 0.9086529016494751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6550474762916565, "epoch": 6.06, "learning_rate": 2.188409880717573e-05, "loss": 0.5423, "step": 7170, "task_loss": 0.6860083937644958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8589059114456177, "epoch": 6.06, "learning_rate": 2.1879402648633418e-05, "loss": 0.6948, "step": 7171, "task_loss": 1.3490904569625854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7704837322235107, "epoch": 6.06, "learning_rate": 2.1874706490091107e-05, "loss": 0.8034, "step": 7172, "task_loss": 0.2552017867565155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6756435632705688, "epoch": 6.06, "learning_rate": 2.1870010331548794e-05, "loss": 0.906, "step": 7173, "task_loss": 1.29674232006073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9244197607040405, "epoch": 6.06, "learning_rate": 2.1865314173006483e-05, "loss": 0.9253, "step": 7174, "task_loss": 1.3732184171676636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.874692440032959, "epoch": 6.07, "learning_rate": 2.186061801446417e-05, "loss": 0.6775, "step": 7175, "task_loss": 0.6197335720062256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6820969581604004, "epoch": 6.07, "learning_rate": 2.1855921855921856e-05, "loss": 0.7291, "step": 7176, "task_loss": 0.25539979338645935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49988046288490295, "epoch": 6.07, "learning_rate": 2.1851225697379546e-05, "loss": 0.6907, "step": 7177, "task_loss": 0.15416975319385529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4537976384162903, "epoch": 6.07, "learning_rate": 2.1846529538837232e-05, "loss": 0.7524, "step": 7178, "task_loss": 0.7771729230880737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5168384909629822, "epoch": 6.07, "learning_rate": 2.1841833380294922e-05, "loss": 0.8483, "step": 7179, "task_loss": 0.8032326698303223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1561343669891357, "epoch": 6.07, "learning_rate": 2.1837137221752608e-05, "loss": 0.7695, "step": 7180, "task_loss": 1.2025516033172607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6765623092651367, "epoch": 6.07, "learning_rate": 2.1832441063210295e-05, "loss": 0.8174, "step": 7181, "task_loss": 0.9995985627174377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.8926937580108643, "epoch": 6.07, "learning_rate": 2.182774490466798e-05, "loss": 1.0419, "step": 7182, "task_loss": 1.49722421169281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47439974546432495, "epoch": 6.07, "learning_rate": 2.182304874612567e-05, "loss": 0.7517, "step": 7183, "task_loss": 0.17238035798072815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5642910599708557, "epoch": 6.07, "learning_rate": 2.1818352587583357e-05, "loss": 0.8247, "step": 7184, "task_loss": 0.053178928792476654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8397172093391418, "epoch": 6.07, "learning_rate": 2.1813656429041047e-05, "loss": 0.7845, "step": 7185, "task_loss": 0.30938243865966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9758416414260864, "epoch": 6.07, "learning_rate": 2.1808960270498733e-05, "loss": 0.8254, "step": 7186, "task_loss": 1.5168362855911255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49982646107673645, "epoch": 6.08, "learning_rate": 2.1804264111956423e-05, "loss": 0.7394, "step": 7187, "task_loss": 0.7977765798568726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1147339344024658, "epoch": 6.08, "learning_rate": 2.1799567953414106e-05, "loss": 0.7209, "step": 7188, "task_loss": 1.8341929912567139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6717292666435242, "epoch": 6.08, "learning_rate": 2.1794871794871795e-05, "loss": 0.6333, "step": 7189, "task_loss": 1.048740029335022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7996200323104858, "epoch": 6.08, "learning_rate": 2.1790175636329482e-05, "loss": 0.7675, "step": 7190, "task_loss": 0.9711360335350037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.670913577079773, "epoch": 6.08, "learning_rate": 2.178547947778717e-05, "loss": 0.5661, "step": 7191, "task_loss": 0.7828474044799805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0462335348129272, "epoch": 6.08, "learning_rate": 2.1780783319244858e-05, "loss": 0.8979, "step": 7192, "task_loss": 1.197798728942871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.75876384973526, "epoch": 6.08, "learning_rate": 2.1776087160702548e-05, "loss": 0.8777, "step": 7193, "task_loss": 0.5137035846710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7453130483627319, "epoch": 6.08, "learning_rate": 2.1771391002160234e-05, "loss": 0.7199, "step": 7194, "task_loss": 0.4225083589553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9166837930679321, "epoch": 6.08, "learning_rate": 2.176669484361792e-05, "loss": 0.6073, "step": 7195, "task_loss": 0.5939455032348633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6556491851806641, "epoch": 6.08, "learning_rate": 2.176199868507561e-05, "loss": 0.8144, "step": 7196, "task_loss": 0.9048523902893066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.664811372756958, "epoch": 6.08, "learning_rate": 2.1757302526533296e-05, "loss": 0.7984, "step": 7197, "task_loss": 0.41085103154182434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4912843704223633, "epoch": 6.08, "learning_rate": 2.1752606367990986e-05, "loss": 0.8464, "step": 7198, "task_loss": 0.7120546698570251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4801912307739258, "epoch": 6.09, "learning_rate": 2.1747910209448672e-05, "loss": 0.4609, "step": 7199, "task_loss": 0.8117266893386841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3592599928379059, "epoch": 6.09, "learning_rate": 2.174321405090636e-05, "loss": 0.497, "step": 7200, "task_loss": 0.3013739287853241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5847285389900208, "epoch": 6.09, "learning_rate": 2.1738517892364045e-05, "loss": 0.833, "step": 7201, "task_loss": 0.6599087715148926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8118599653244019, "epoch": 6.09, "learning_rate": 2.1733821733821735e-05, "loss": 0.8178, "step": 7202, "task_loss": 1.083342432975769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4671546518802643, "epoch": 6.09, "learning_rate": 2.172912557527942e-05, "loss": 0.5851, "step": 7203, "task_loss": 0.8996623158454895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5797631144523621, "epoch": 6.09, "learning_rate": 2.172442941673711e-05, "loss": 1.0265, "step": 7204, "task_loss": 1.4018033742904663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47824281454086304, "epoch": 6.09, "learning_rate": 2.1719733258194797e-05, "loss": 0.8023, "step": 7205, "task_loss": 0.35927483439445496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.952175498008728, "epoch": 6.09, "learning_rate": 2.1715037099652487e-05, "loss": 0.8639, "step": 7206, "task_loss": 1.1770477294921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8245330452919006, "epoch": 6.09, "learning_rate": 2.1710340941110173e-05, "loss": 0.8024, "step": 7207, "task_loss": 0.36155787110328674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49265801906585693, "epoch": 6.09, "learning_rate": 2.170564478256786e-05, "loss": 0.7825, "step": 7208, "task_loss": 0.8571830987930298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6904418468475342, "epoch": 6.09, "learning_rate": 2.170094862402555e-05, "loss": 0.8175, "step": 7209, "task_loss": 0.6895142793655396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6702935695648193, "epoch": 6.09, "learning_rate": 2.1696252465483236e-05, "loss": 0.7629, "step": 7210, "task_loss": 0.7683337330818176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4230085611343384, "epoch": 6.1, "learning_rate": 2.1691556306940925e-05, "loss": 0.8864, "step": 7211, "task_loss": 2.0329971313476562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5686674118041992, "epoch": 6.1, "learning_rate": 2.1686860148398612e-05, "loss": 0.586, "step": 7212, "task_loss": 0.7162639498710632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0078449249267578, "epoch": 6.1, "learning_rate": 2.1682163989856298e-05, "loss": 0.6551, "step": 7213, "task_loss": 0.7790558934211731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9040428400039673, "epoch": 6.1, "learning_rate": 2.1677467831313984e-05, "loss": 0.892, "step": 7214, "task_loss": 1.0883476734161377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9948602914810181, "epoch": 6.1, "learning_rate": 2.1672771672771674e-05, "loss": 0.8149, "step": 7215, "task_loss": 1.6493476629257202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8038848638534546, "epoch": 6.1, "learning_rate": 2.166807551422936e-05, "loss": 0.8258, "step": 7216, "task_loss": 1.3868155479431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1387102603912354, "epoch": 6.1, "learning_rate": 2.166337935568705e-05, "loss": 0.9815, "step": 7217, "task_loss": 1.17807936668396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9181540012359619, "epoch": 6.1, "learning_rate": 2.1658683197144737e-05, "loss": 0.6725, "step": 7218, "task_loss": 1.9234899282455444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7684754133224487, "epoch": 6.1, "learning_rate": 2.1653987038602426e-05, "loss": 0.7153, "step": 7219, "task_loss": 0.529316246509552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5157217979431152, "epoch": 6.1, "learning_rate": 2.164929088006011e-05, "loss": 0.628, "step": 7220, "task_loss": 0.08424042910337448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.171029806137085, "epoch": 6.1, "learning_rate": 2.16445947215178e-05, "loss": 0.6756, "step": 7221, "task_loss": 0.7613431811332703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8572132587432861, "epoch": 6.1, "learning_rate": 2.1639898562975485e-05, "loss": 0.74, "step": 7222, "task_loss": 0.8983513116836548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6805492639541626, "epoch": 6.11, "learning_rate": 2.1635202404433175e-05, "loss": 0.9395, "step": 7223, "task_loss": 0.6547703742980957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8042693138122559, "epoch": 6.11, "learning_rate": 2.1630506245890865e-05, "loss": 0.7344, "step": 7224, "task_loss": 1.053215503692627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7065130472183228, "epoch": 6.11, "learning_rate": 2.162581008734855e-05, "loss": 0.8014, "step": 7225, "task_loss": 1.6923737525939941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6202841401100159, "epoch": 6.11, "learning_rate": 2.1621113928806237e-05, "loss": 0.887, "step": 7226, "task_loss": 0.6131212711334229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4195966124534607, "epoch": 6.11, "learning_rate": 2.1616417770263924e-05, "loss": 0.7064, "step": 7227, "task_loss": 0.61259526014328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8245047926902771, "epoch": 6.11, "learning_rate": 2.1611721611721613e-05, "loss": 0.8428, "step": 7228, "task_loss": 0.6946379542350769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3024053573608398, "epoch": 6.11, "learning_rate": 2.16070254531793e-05, "loss": 0.7953, "step": 7229, "task_loss": 1.2562988996505737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7147060632705688, "epoch": 6.11, "learning_rate": 2.160232929463699e-05, "loss": 0.8503, "step": 7230, "task_loss": 0.7056276202201843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0790929794311523, "epoch": 6.11, "learning_rate": 2.1597633136094676e-05, "loss": 0.7848, "step": 7231, "task_loss": 0.887088418006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5855640172958374, "epoch": 6.11, "learning_rate": 2.1592936977552362e-05, "loss": 0.614, "step": 7232, "task_loss": 0.3032853901386261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7523081302642822, "epoch": 6.11, "learning_rate": 2.158824081901005e-05, "loss": 0.7284, "step": 7233, "task_loss": 0.7506266832351685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5378092527389526, "epoch": 6.11, "learning_rate": 2.1583544660467738e-05, "loss": 0.8681, "step": 7234, "task_loss": 0.697575569152832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5744731426239014, "epoch": 6.12, "learning_rate": 2.1578848501925425e-05, "loss": 0.8718, "step": 7235, "task_loss": 0.5974279642105103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6487067341804504, "epoch": 6.12, "learning_rate": 2.1574152343383114e-05, "loss": 0.6438, "step": 7236, "task_loss": 0.3642679452896118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6631360054016113, "epoch": 6.12, "learning_rate": 2.15694561848408e-05, "loss": 0.8855, "step": 7237, "task_loss": 1.20645272731781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6382851004600525, "epoch": 6.12, "learning_rate": 2.156476002629849e-05, "loss": 0.5294, "step": 7238, "task_loss": 0.5730342864990234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6085051894187927, "epoch": 6.12, "learning_rate": 2.1560063867756177e-05, "loss": 0.6526, "step": 7239, "task_loss": 1.1035147905349731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0517230033874512, "epoch": 6.12, "learning_rate": 2.1555367709213863e-05, "loss": 0.7708, "step": 7240, "task_loss": 0.6915375590324402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7854740619659424, "epoch": 6.12, "learning_rate": 2.1550671550671553e-05, "loss": 0.9201, "step": 7241, "task_loss": 0.6550876498222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8438715934753418, "epoch": 6.12, "learning_rate": 2.154597539212924e-05, "loss": 0.9139, "step": 7242, "task_loss": 1.0264166593551636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3967197835445404, "epoch": 6.12, "learning_rate": 2.154127923358693e-05, "loss": 0.69, "step": 7243, "task_loss": 0.1629209965467453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7615933418273926, "epoch": 6.12, "learning_rate": 2.1536583075044615e-05, "loss": 0.7217, "step": 7244, "task_loss": 1.0993856191635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8833132386207581, "epoch": 6.12, "learning_rate": 2.15318869165023e-05, "loss": 0.7274, "step": 7245, "task_loss": 0.7913080453872681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7881261706352234, "epoch": 6.13, "learning_rate": 2.1527190757959988e-05, "loss": 0.8763, "step": 7246, "task_loss": 1.7215936183929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6141406297683716, "epoch": 6.13, "learning_rate": 2.1522494599417678e-05, "loss": 0.688, "step": 7247, "task_loss": 1.0577762126922607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7764167189598083, "epoch": 6.13, "learning_rate": 2.1517798440875364e-05, "loss": 0.7022, "step": 7248, "task_loss": 1.0579684972763062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.828714907169342, "epoch": 6.13, "learning_rate": 2.1513102282333054e-05, "loss": 0.5989, "step": 7249, "task_loss": 0.44812169671058655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.174026370048523, "epoch": 6.13, "learning_rate": 2.150840612379074e-05, "loss": 0.8788, "step": 7250, "task_loss": 0.8426690697669983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7745528221130371, "epoch": 6.13, "learning_rate": 2.1503709965248426e-05, "loss": 0.8316, "step": 7251, "task_loss": 1.0224412679672241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.021286964416504, "epoch": 6.13, "learning_rate": 2.1499013806706113e-05, "loss": 0.9524, "step": 7252, "task_loss": 1.03788161277771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6251346468925476, "epoch": 6.13, "learning_rate": 2.1494317648163802e-05, "loss": 0.7201, "step": 7253, "task_loss": 0.6775016188621521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1667661666870117, "epoch": 6.13, "learning_rate": 2.1489621489621492e-05, "loss": 0.7936, "step": 7254, "task_loss": 0.571725606918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49303075671195984, "epoch": 6.13, "learning_rate": 2.148492533107918e-05, "loss": 0.7557, "step": 7255, "task_loss": 1.052638292312622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4516313076019287, "epoch": 6.13, "learning_rate": 2.1480229172536868e-05, "loss": 0.7646, "step": 7256, "task_loss": 0.3792729377746582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3801559209823608, "epoch": 6.13, "learning_rate": 2.1475533013994555e-05, "loss": 0.6164, "step": 7257, "task_loss": 0.9194687604904175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7355378866195679, "epoch": 6.14, "learning_rate": 2.147083685545224e-05, "loss": 0.8276, "step": 7258, "task_loss": 1.2954992055892944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6039822101593018, "epoch": 6.14, "learning_rate": 2.1466140696909927e-05, "loss": 0.8629, "step": 7259, "task_loss": 0.857092559337616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1318070888519287, "epoch": 6.14, "learning_rate": 2.1461444538367617e-05, "loss": 0.8385, "step": 7260, "task_loss": 1.4391353130340576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0582722425460815, "epoch": 6.14, "learning_rate": 2.1456748379825303e-05, "loss": 0.7809, "step": 7261, "task_loss": 0.7872764468193054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5530414581298828, "epoch": 6.14, "learning_rate": 2.1452052221282993e-05, "loss": 0.6073, "step": 7262, "task_loss": 0.10894644260406494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.791254997253418, "epoch": 6.14, "learning_rate": 2.144735606274068e-05, "loss": 0.7945, "step": 7263, "task_loss": 1.045981764793396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3289176821708679, "epoch": 6.14, "learning_rate": 2.1442659904198366e-05, "loss": 0.6833, "step": 7264, "task_loss": 0.9170638918876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6668128371238708, "epoch": 6.14, "learning_rate": 2.1437963745656052e-05, "loss": 0.6903, "step": 7265, "task_loss": 1.2841181755065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6977182626724243, "epoch": 6.14, "learning_rate": 2.1433267587113742e-05, "loss": 0.8821, "step": 7266, "task_loss": 1.0069397687911987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.641633152961731, "epoch": 6.14, "learning_rate": 2.1428571428571428e-05, "loss": 0.5638, "step": 7267, "task_loss": 1.2753231525421143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42851322889328003, "epoch": 6.14, "learning_rate": 2.1423875270029118e-05, "loss": 0.5608, "step": 7268, "task_loss": 1.023807406425476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8755781650543213, "epoch": 6.14, "learning_rate": 2.1419179111486804e-05, "loss": 0.7999, "step": 7269, "task_loss": 0.3968278765678406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4234994947910309, "epoch": 6.15, "learning_rate": 2.1414482952944494e-05, "loss": 0.9036, "step": 7270, "task_loss": 0.09756513684988022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3662344217300415, "epoch": 6.15, "learning_rate": 2.140978679440218e-05, "loss": 1.0458, "step": 7271, "task_loss": 0.9560425281524658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9812257289886475, "epoch": 6.15, "learning_rate": 2.1405090635859867e-05, "loss": 0.7054, "step": 7272, "task_loss": 0.7594797611236572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6088255643844604, "epoch": 6.15, "learning_rate": 2.1400394477317556e-05, "loss": 0.6196, "step": 7273, "task_loss": 0.28826162219047546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0574575662612915, "epoch": 6.15, "learning_rate": 2.1395698318775243e-05, "loss": 1.054, "step": 7274, "task_loss": 1.1559851169586182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.5081233978271484, "epoch": 6.15, "learning_rate": 2.1391002160232932e-05, "loss": 1.0462, "step": 7275, "task_loss": 2.451568126678467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7172419428825378, "epoch": 6.15, "learning_rate": 2.138630600169062e-05, "loss": 0.6038, "step": 7276, "task_loss": 1.4592489004135132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8442765474319458, "epoch": 6.15, "learning_rate": 2.1381609843148305e-05, "loss": 0.6626, "step": 7277, "task_loss": 0.7386364340782166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6793843507766724, "epoch": 6.15, "learning_rate": 2.137691368460599e-05, "loss": 0.5958, "step": 7278, "task_loss": 0.13768985867500305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5622518062591553, "epoch": 6.15, "learning_rate": 2.137221752606368e-05, "loss": 0.7642, "step": 7279, "task_loss": 0.46353909373283386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8462156057357788, "epoch": 6.15, "learning_rate": 2.1367521367521368e-05, "loss": 0.7459, "step": 7280, "task_loss": 0.8773670196533203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.98182213306427, "epoch": 6.15, "learning_rate": 2.1362825208979057e-05, "loss": 0.8802, "step": 7281, "task_loss": 0.34256017208099365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6443221569061279, "epoch": 6.16, "learning_rate": 2.1358129050436744e-05, "loss": 0.6565, "step": 7282, "task_loss": 0.39825159311294556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6809701919555664, "epoch": 6.16, "learning_rate": 2.135343289189443e-05, "loss": 0.7691, "step": 7283, "task_loss": 0.3650287091732025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4708264172077179, "epoch": 6.16, "learning_rate": 2.1348736733352116e-05, "loss": 0.6392, "step": 7284, "task_loss": 0.18303744494915009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4702027142047882, "epoch": 6.16, "learning_rate": 2.1344040574809806e-05, "loss": 0.6071, "step": 7285, "task_loss": 0.8118699789047241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8072978258132935, "epoch": 6.16, "learning_rate": 2.1339344416267496e-05, "loss": 0.8333, "step": 7286, "task_loss": 0.43348655104637146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.212141990661621, "epoch": 6.16, "learning_rate": 2.1334648257725182e-05, "loss": 0.8561, "step": 7287, "task_loss": 0.8420369625091553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.653691291809082, "epoch": 6.16, "learning_rate": 2.1329952099182872e-05, "loss": 0.7037, "step": 7288, "task_loss": 0.6111598610877991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2954797744750977, "epoch": 6.16, "learning_rate": 2.1325255940640558e-05, "loss": 1.0747, "step": 7289, "task_loss": 1.3628555536270142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5825004577636719, "epoch": 6.16, "learning_rate": 2.1320559782098244e-05, "loss": 0.582, "step": 7290, "task_loss": 0.1854131817817688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.414575457572937, "epoch": 6.16, "learning_rate": 2.131586362355593e-05, "loss": 1.1293, "step": 7291, "task_loss": 1.0222036838531494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.518298864364624, "epoch": 6.16, "learning_rate": 2.131116746501362e-05, "loss": 0.8891, "step": 7292, "task_loss": 0.4502016305923462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7490759491920471, "epoch": 6.16, "learning_rate": 2.1306471306471307e-05, "loss": 0.7214, "step": 7293, "task_loss": 1.113375186920166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6239113807678223, "epoch": 6.17, "learning_rate": 2.1301775147928997e-05, "loss": 0.6411, "step": 7294, "task_loss": 0.47567692399024963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8071575164794922, "epoch": 6.17, "learning_rate": 2.1297078989386683e-05, "loss": 0.6916, "step": 7295, "task_loss": 0.6421604156494141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1491310596466064, "epoch": 6.17, "learning_rate": 2.129238283084437e-05, "loss": 0.769, "step": 7296, "task_loss": 1.2147761583328247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.048752784729004, "epoch": 6.17, "learning_rate": 2.1287686672302056e-05, "loss": 0.6648, "step": 7297, "task_loss": 0.7053056955337524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6138238906860352, "epoch": 6.17, "learning_rate": 2.1282990513759745e-05, "loss": 0.665, "step": 7298, "task_loss": 0.7881291508674622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7109013795852661, "epoch": 6.17, "learning_rate": 2.127829435521743e-05, "loss": 0.5564, "step": 7299, "task_loss": 0.5207294821739197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9928478002548218, "epoch": 6.17, "learning_rate": 2.127359819667512e-05, "loss": 0.7403, "step": 7300, "task_loss": 0.6073621511459351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.7343764305114746, "epoch": 6.17, "learning_rate": 2.126890203813281e-05, "loss": 0.972, "step": 7301, "task_loss": 1.8446147441864014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29834824800491333, "epoch": 6.17, "learning_rate": 2.1264205879590497e-05, "loss": 0.6227, "step": 7302, "task_loss": 0.30995550751686096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.613131046295166, "epoch": 6.17, "learning_rate": 2.1259509721048184e-05, "loss": 0.841, "step": 7303, "task_loss": 0.675408661365509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4670330286026001, "epoch": 6.17, "learning_rate": 2.125481356250587e-05, "loss": 0.5726, "step": 7304, "task_loss": 0.10194724053144455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0692964792251587, "epoch": 6.17, "learning_rate": 2.125011740396356e-05, "loss": 1.0102, "step": 7305, "task_loss": 2.002070188522339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5963374972343445, "epoch": 6.18, "learning_rate": 2.1245421245421246e-05, "loss": 0.7314, "step": 7306, "task_loss": 0.11792619526386261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6287658214569092, "epoch": 6.18, "learning_rate": 2.1240725086878936e-05, "loss": 0.746, "step": 7307, "task_loss": 0.8577568531036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.923244833946228, "epoch": 6.18, "learning_rate": 2.1236028928336622e-05, "loss": 0.8022, "step": 7308, "task_loss": 1.819543719291687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8460518717765808, "epoch": 6.18, "learning_rate": 2.123133276979431e-05, "loss": 0.8748, "step": 7309, "task_loss": 1.0899124145507812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6069716811180115, "epoch": 6.18, "learning_rate": 2.1226636611251995e-05, "loss": 0.7937, "step": 7310, "task_loss": 0.11758802086114883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9613973498344421, "epoch": 6.18, "learning_rate": 2.1221940452709685e-05, "loss": 0.993, "step": 7311, "task_loss": 0.9018083810806274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6727063655853271, "epoch": 6.18, "learning_rate": 2.121724429416737e-05, "loss": 0.8886, "step": 7312, "task_loss": 0.650562584400177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6873434782028198, "epoch": 6.18, "learning_rate": 2.121254813562506e-05, "loss": 0.8502, "step": 7313, "task_loss": 0.6804630160331726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.502631425857544, "epoch": 6.18, "learning_rate": 2.1207851977082747e-05, "loss": 0.6542, "step": 7314, "task_loss": 0.3972668945789337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0118098258972168, "epoch": 6.18, "learning_rate": 2.1203155818540433e-05, "loss": 0.7148, "step": 7315, "task_loss": 1.3436074256896973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7391782402992249, "epoch": 6.18, "learning_rate": 2.1198459659998123e-05, "loss": 0.7943, "step": 7316, "task_loss": 0.8741737604141235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5714748501777649, "epoch": 6.19, "learning_rate": 2.119376350145581e-05, "loss": 0.6647, "step": 7317, "task_loss": 0.6296858787536621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7975035905838013, "epoch": 6.19, "learning_rate": 2.11890673429135e-05, "loss": 0.8271, "step": 7318, "task_loss": 1.2818903923034668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0133475065231323, "epoch": 6.19, "learning_rate": 2.1184371184371186e-05, "loss": 0.7281, "step": 7319, "task_loss": 0.6959531307220459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8711767196655273, "epoch": 6.19, "learning_rate": 2.1179675025828875e-05, "loss": 0.7315, "step": 7320, "task_loss": 1.5789134502410889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8759069442749023, "epoch": 6.19, "learning_rate": 2.117497886728656e-05, "loss": 1.0333, "step": 7321, "task_loss": 1.6512410640716553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9129708409309387, "epoch": 6.19, "learning_rate": 2.1170282708744248e-05, "loss": 0.843, "step": 7322, "task_loss": 1.2214851379394531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5919798612594604, "epoch": 6.19, "learning_rate": 2.1165586550201934e-05, "loss": 0.67, "step": 7323, "task_loss": 0.5314354300498962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8062951564788818, "epoch": 6.19, "learning_rate": 2.1160890391659624e-05, "loss": 0.6456, "step": 7324, "task_loss": 0.32059571146965027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.773934006690979, "epoch": 6.19, "learning_rate": 2.115619423311731e-05, "loss": 0.8783, "step": 7325, "task_loss": 0.9014705419540405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5764439105987549, "epoch": 6.19, "learning_rate": 2.1151498074575e-05, "loss": 0.6263, "step": 7326, "task_loss": 0.36131203174591064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39726966619491577, "epoch": 6.19, "learning_rate": 2.1146801916032686e-05, "loss": 0.6354, "step": 7327, "task_loss": 0.2478232979774475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5562845468521118, "epoch": 6.19, "learning_rate": 2.1142105757490373e-05, "loss": 0.7944, "step": 7328, "task_loss": 0.9592318534851074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.794558048248291, "epoch": 6.2, "learning_rate": 2.113740959894806e-05, "loss": 0.8461, "step": 7329, "task_loss": 0.22421178221702576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7193570137023926, "epoch": 6.2, "learning_rate": 2.113271344040575e-05, "loss": 0.6712, "step": 7330, "task_loss": 0.3663038909435272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5827070474624634, "epoch": 6.2, "learning_rate": 2.112801728186344e-05, "loss": 0.8942, "step": 7331, "task_loss": 1.3067294359207153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7363001704216003, "epoch": 6.2, "learning_rate": 2.1123321123321125e-05, "loss": 0.7152, "step": 7332, "task_loss": 0.47304561734199524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.228287696838379, "epoch": 6.2, "learning_rate": 2.1118624964778815e-05, "loss": 0.8615, "step": 7333, "task_loss": 1.7882959842681885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4802640676498413, "epoch": 6.2, "learning_rate": 2.1113928806236498e-05, "loss": 0.6511, "step": 7334, "task_loss": 0.8797512650489807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42833617329597473, "epoch": 6.2, "learning_rate": 2.1109232647694187e-05, "loss": 0.7442, "step": 7335, "task_loss": 0.3300752341747284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6832612156867981, "epoch": 6.2, "learning_rate": 2.1104536489151874e-05, "loss": 0.6556, "step": 7336, "task_loss": 0.6410014629364014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0272929668426514, "epoch": 6.2, "learning_rate": 2.1099840330609563e-05, "loss": 0.7582, "step": 7337, "task_loss": 1.223035216331482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.598798394203186, "epoch": 6.2, "learning_rate": 2.109514417206725e-05, "loss": 0.8003, "step": 7338, "task_loss": 0.9853284358978271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7869447469711304, "epoch": 6.2, "learning_rate": 2.109044801352494e-05, "loss": 0.5525, "step": 7339, "task_loss": 0.3112577199935913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.830415666103363, "epoch": 6.2, "learning_rate": 2.1085751854982626e-05, "loss": 0.9618, "step": 7340, "task_loss": 0.8471307754516602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8166911005973816, "epoch": 6.21, "learning_rate": 2.1081055696440312e-05, "loss": 0.7667, "step": 7341, "task_loss": 1.1074939966201782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5692092180252075, "epoch": 6.21, "learning_rate": 2.1076359537898e-05, "loss": 0.8185, "step": 7342, "task_loss": 0.8674144744873047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8530000448226929, "epoch": 6.21, "learning_rate": 2.1071663379355688e-05, "loss": 0.9748, "step": 7343, "task_loss": 0.7794963121414185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6765753626823425, "epoch": 6.21, "learning_rate": 2.1066967220813375e-05, "loss": 0.6717, "step": 7344, "task_loss": 0.48213571310043335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7187105417251587, "epoch": 6.21, "learning_rate": 2.1062271062271064e-05, "loss": 0.6571, "step": 7345, "task_loss": 0.33861497044563293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30975550413131714, "epoch": 6.21, "learning_rate": 2.105757490372875e-05, "loss": 0.7254, "step": 7346, "task_loss": 0.9519387483596802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1694822311401367, "epoch": 6.21, "learning_rate": 2.1052878745186437e-05, "loss": 0.8362, "step": 7347, "task_loss": 1.2636828422546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5459661483764648, "epoch": 6.21, "learning_rate": 2.1048182586644127e-05, "loss": 0.7631, "step": 7348, "task_loss": 0.5094197988510132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1794116497039795, "epoch": 6.21, "learning_rate": 2.1043486428101813e-05, "loss": 0.8282, "step": 7349, "task_loss": 2.7132728099823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5219706296920776, "epoch": 6.21, "learning_rate": 2.1038790269559503e-05, "loss": 0.8697, "step": 7350, "task_loss": 0.4655725955963135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8996977806091309, "epoch": 6.21, "learning_rate": 2.103409411101719e-05, "loss": 0.7549, "step": 7351, "task_loss": 0.7882678508758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8599838614463806, "epoch": 6.21, "learning_rate": 2.102939795247488e-05, "loss": 0.7523, "step": 7352, "task_loss": 0.8759251832962036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5591468811035156, "epoch": 6.22, "learning_rate": 2.1024701793932565e-05, "loss": 0.7208, "step": 7353, "task_loss": 1.0729572772979736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6783599853515625, "epoch": 6.22, "learning_rate": 2.102000563539025e-05, "loss": 0.7947, "step": 7354, "task_loss": 1.139552354812622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6930433511734009, "epoch": 6.22, "learning_rate": 2.1015309476847938e-05, "loss": 0.7187, "step": 7355, "task_loss": 0.4436817169189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.683974027633667, "epoch": 6.22, "learning_rate": 2.1010613318305628e-05, "loss": 0.6961, "step": 7356, "task_loss": 1.223222255706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.758063793182373, "epoch": 6.22, "learning_rate": 2.1005917159763314e-05, "loss": 0.8119, "step": 7357, "task_loss": 0.8501352667808533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5772619843482971, "epoch": 6.22, "learning_rate": 2.1001221001221004e-05, "loss": 0.8086, "step": 7358, "task_loss": 0.38559019565582275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6232973337173462, "epoch": 6.22, "learning_rate": 2.099652484267869e-05, "loss": 0.5629, "step": 7359, "task_loss": 0.7940839529037476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.870672881603241, "epoch": 6.22, "learning_rate": 2.0991828684136376e-05, "loss": 0.6846, "step": 7360, "task_loss": 0.8545135259628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7082021832466125, "epoch": 6.22, "learning_rate": 2.0987132525594063e-05, "loss": 0.7674, "step": 7361, "task_loss": 0.8793970346450806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7765367031097412, "epoch": 6.22, "learning_rate": 2.0982436367051752e-05, "loss": 0.8243, "step": 7362, "task_loss": 0.5434755086898804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6026843190193176, "epoch": 6.22, "learning_rate": 2.0977740208509442e-05, "loss": 0.7438, "step": 7363, "task_loss": 0.7945125102996826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9115158319473267, "epoch": 6.22, "learning_rate": 2.097304404996713e-05, "loss": 0.7572, "step": 7364, "task_loss": 0.7040691375732422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5095666646957397, "epoch": 6.23, "learning_rate": 2.0968347891424818e-05, "loss": 0.7362, "step": 7365, "task_loss": 1.0690009593963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6209613084793091, "epoch": 6.23, "learning_rate": 2.09636517328825e-05, "loss": 0.7377, "step": 7366, "task_loss": 0.19138358533382416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.430869460105896, "epoch": 6.23, "learning_rate": 2.095895557434019e-05, "loss": 0.6664, "step": 7367, "task_loss": 1.1845269203186035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6543041467666626, "epoch": 6.23, "learning_rate": 2.0954259415797877e-05, "loss": 0.848, "step": 7368, "task_loss": 0.4298076033592224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0701470375061035, "epoch": 6.23, "learning_rate": 2.0949563257255567e-05, "loss": 0.6883, "step": 7369, "task_loss": 1.2839834690093994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0643272399902344, "epoch": 6.23, "learning_rate": 2.0944867098713253e-05, "loss": 0.9253, "step": 7370, "task_loss": 1.9272266626358032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8384515643119812, "epoch": 6.23, "learning_rate": 2.0940170940170943e-05, "loss": 0.7032, "step": 7371, "task_loss": 0.5410663485527039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7600892782211304, "epoch": 6.23, "learning_rate": 2.093547478162863e-05, "loss": 0.6635, "step": 7372, "task_loss": 0.5667957663536072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3808114528656006, "epoch": 6.23, "learning_rate": 2.0930778623086316e-05, "loss": 0.904, "step": 7373, "task_loss": 1.5246059894561768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9360626935958862, "epoch": 6.23, "learning_rate": 2.0926082464544002e-05, "loss": 0.9408, "step": 7374, "task_loss": 1.1228195428848267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5500826239585876, "epoch": 6.23, "learning_rate": 2.0921386306001692e-05, "loss": 0.5733, "step": 7375, "task_loss": 0.426235556602478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6655076146125793, "epoch": 6.23, "learning_rate": 2.0916690147459378e-05, "loss": 0.9387, "step": 7376, "task_loss": 1.268262267112732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.599971354007721, "epoch": 6.24, "learning_rate": 2.0911993988917068e-05, "loss": 0.8371, "step": 7377, "task_loss": 0.5753202438354492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8222448825836182, "epoch": 6.24, "learning_rate": 2.0907297830374754e-05, "loss": 0.7653, "step": 7378, "task_loss": 1.6042546033859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7423411011695862, "epoch": 6.24, "learning_rate": 2.090260167183244e-05, "loss": 0.7653, "step": 7379, "task_loss": 1.3295553922653198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5579724311828613, "epoch": 6.24, "learning_rate": 2.089790551329013e-05, "loss": 0.6765, "step": 7380, "task_loss": 0.6420096158981323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7439407110214233, "epoch": 6.24, "learning_rate": 2.0893209354747817e-05, "loss": 0.8627, "step": 7381, "task_loss": 1.2603936195373535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.625250518321991, "epoch": 6.24, "learning_rate": 2.0888513196205506e-05, "loss": 0.9104, "step": 7382, "task_loss": 1.421195149421692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.085374116897583, "epoch": 6.24, "learning_rate": 2.0883817037663193e-05, "loss": 0.8325, "step": 7383, "task_loss": 1.346087098121643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5015108585357666, "epoch": 6.24, "learning_rate": 2.0879120879120882e-05, "loss": 0.6582, "step": 7384, "task_loss": 0.5274398922920227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6149212121963501, "epoch": 6.24, "learning_rate": 2.087442472057857e-05, "loss": 0.7354, "step": 7385, "task_loss": 0.4828100800514221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.745689332485199, "epoch": 6.24, "learning_rate": 2.0869728562036255e-05, "loss": 0.5751, "step": 7386, "task_loss": 0.22957485914230347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6897575855255127, "epoch": 6.24, "learning_rate": 2.086503240349394e-05, "loss": 0.6512, "step": 7387, "task_loss": 1.0318617820739746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8402003049850464, "epoch": 6.24, "learning_rate": 2.086033624495163e-05, "loss": 0.806, "step": 7388, "task_loss": 0.33925187587738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9079195261001587, "epoch": 6.25, "learning_rate": 2.0855640086409317e-05, "loss": 0.6746, "step": 7389, "task_loss": 0.630473256111145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7542190551757812, "epoch": 6.25, "learning_rate": 2.0850943927867007e-05, "loss": 0.7077, "step": 7390, "task_loss": 0.4988028109073639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.562968373298645, "epoch": 6.25, "learning_rate": 2.0846247769324694e-05, "loss": 0.716, "step": 7391, "task_loss": 0.6563072204589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1461164951324463, "epoch": 6.25, "learning_rate": 2.084155161078238e-05, "loss": 0.8346, "step": 7392, "task_loss": 1.3953099250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6645389795303345, "epoch": 6.25, "learning_rate": 2.083685545224007e-05, "loss": 0.5527, "step": 7393, "task_loss": 0.8259853720664978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4294430613517761, "epoch": 6.25, "learning_rate": 2.0832159293697756e-05, "loss": 0.641, "step": 7394, "task_loss": 0.765816867351532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34131571650505066, "epoch": 6.25, "learning_rate": 2.0827463135155446e-05, "loss": 0.6477, "step": 7395, "task_loss": 0.3596201241016388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34524106979370117, "epoch": 6.25, "learning_rate": 2.0822766976613132e-05, "loss": 0.6091, "step": 7396, "task_loss": 0.3914586305618286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0210773944854736, "epoch": 6.25, "learning_rate": 2.0818070818070822e-05, "loss": 0.7631, "step": 7397, "task_loss": 1.1279278993606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5434674620628357, "epoch": 6.25, "learning_rate": 2.0813374659528505e-05, "loss": 0.775, "step": 7398, "task_loss": 0.45286065340042114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8154321312904358, "epoch": 6.25, "learning_rate": 2.0808678500986194e-05, "loss": 1.0322, "step": 7399, "task_loss": 2.0448100566864014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5701747536659241, "epoch": 6.26, "learning_rate": 2.080398234244388e-05, "loss": 0.7295, "step": 7400, "task_loss": 0.568591296672821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6002664566040039, "epoch": 6.26, "learning_rate": 2.079928618390157e-05, "loss": 0.7291, "step": 7401, "task_loss": 0.5425191521644592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9865236878395081, "epoch": 6.26, "learning_rate": 2.0794590025359257e-05, "loss": 0.8912, "step": 7402, "task_loss": 0.9727468490600586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6687947511672974, "epoch": 6.26, "learning_rate": 2.0789893866816947e-05, "loss": 0.7688, "step": 7403, "task_loss": 0.41799309849739075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6444734334945679, "epoch": 6.26, "learning_rate": 2.0785197708274633e-05, "loss": 0.6799, "step": 7404, "task_loss": 0.3296579122543335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1323192119598389, "epoch": 6.26, "learning_rate": 2.078050154973232e-05, "loss": 0.8226, "step": 7405, "task_loss": 0.7677993774414062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3203374147415161, "epoch": 6.26, "learning_rate": 2.0775805391190006e-05, "loss": 0.7936, "step": 7406, "task_loss": 0.3963952362537384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8323668241500854, "epoch": 6.26, "learning_rate": 2.0771109232647695e-05, "loss": 0.6554, "step": 7407, "task_loss": 1.4125797748565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.85093754529953, "epoch": 6.26, "learning_rate": 2.0766413074105385e-05, "loss": 0.8541, "step": 7408, "task_loss": 1.2057921886444092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7733677625656128, "epoch": 6.26, "learning_rate": 2.076171691556307e-05, "loss": 0.7217, "step": 7409, "task_loss": 1.016628384590149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6102703213691711, "epoch": 6.26, "learning_rate": 2.0757020757020758e-05, "loss": 0.8564, "step": 7410, "task_loss": 0.8334298729896545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9523823261260986, "epoch": 6.26, "learning_rate": 2.0752324598478444e-05, "loss": 0.7016, "step": 7411, "task_loss": 1.1367703676223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5028101801872253, "epoch": 6.27, "learning_rate": 2.0747628439936134e-05, "loss": 0.6691, "step": 7412, "task_loss": 0.7603978514671326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7649860382080078, "epoch": 6.27, "learning_rate": 2.074293228139382e-05, "loss": 0.8223, "step": 7413, "task_loss": 1.6488134860992432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8108687400817871, "epoch": 6.27, "learning_rate": 2.073823612285151e-05, "loss": 0.7728, "step": 7414, "task_loss": 1.4445828199386597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7168159484863281, "epoch": 6.27, "learning_rate": 2.0733539964309196e-05, "loss": 0.6596, "step": 7415, "task_loss": 0.8484806418418884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1400716304779053, "epoch": 6.27, "learning_rate": 2.0728843805766886e-05, "loss": 1.0189, "step": 7416, "task_loss": 2.321479320526123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5252053737640381, "epoch": 6.27, "learning_rate": 2.072414764722457e-05, "loss": 0.6466, "step": 7417, "task_loss": 0.5142242908477783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0043941736221313, "epoch": 6.27, "learning_rate": 2.071945148868226e-05, "loss": 0.76, "step": 7418, "task_loss": 0.7001596093177795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8996866941452026, "epoch": 6.27, "learning_rate": 2.0714755330139945e-05, "loss": 0.7252, "step": 7419, "task_loss": 0.3344026803970337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9410960674285889, "epoch": 6.27, "learning_rate": 2.0710059171597635e-05, "loss": 0.8433, "step": 7420, "task_loss": 1.3487679958343506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5089696645736694, "epoch": 6.27, "learning_rate": 2.070536301305532e-05, "loss": 0.6191, "step": 7421, "task_loss": 0.883463442325592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8386927247047424, "epoch": 6.27, "learning_rate": 2.070066685451301e-05, "loss": 0.7684, "step": 7422, "task_loss": 0.4113532304763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6311105489730835, "epoch": 6.27, "learning_rate": 2.0695970695970697e-05, "loss": 0.8188, "step": 7423, "task_loss": 0.4659413993358612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5875629186630249, "epoch": 6.28, "learning_rate": 2.0691274537428383e-05, "loss": 0.7516, "step": 7424, "task_loss": 0.41164731979370117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6897075772285461, "epoch": 6.28, "learning_rate": 2.0686578378886073e-05, "loss": 0.645, "step": 7425, "task_loss": 0.5500578880310059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7783454656600952, "epoch": 6.28, "learning_rate": 2.068188222034376e-05, "loss": 0.8443, "step": 7426, "task_loss": 0.9478763937950134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40555626153945923, "epoch": 6.28, "learning_rate": 2.067718606180145e-05, "loss": 0.6945, "step": 7427, "task_loss": 0.3701245188713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6588277816772461, "epoch": 6.28, "learning_rate": 2.0672489903259136e-05, "loss": 0.6197, "step": 7428, "task_loss": 0.48090481758117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46678662300109863, "epoch": 6.28, "learning_rate": 2.0667793744716822e-05, "loss": 0.6836, "step": 7429, "task_loss": 0.31632208824157715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7910106182098389, "epoch": 6.28, "learning_rate": 2.0663097586174508e-05, "loss": 0.6653, "step": 7430, "task_loss": 0.2654785215854645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7949224710464478, "epoch": 6.28, "learning_rate": 2.0658401427632198e-05, "loss": 0.7778, "step": 7431, "task_loss": 1.1252044439315796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6070295572280884, "epoch": 6.28, "learning_rate": 2.0653705269089884e-05, "loss": 0.5675, "step": 7432, "task_loss": 0.6293492317199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5609884262084961, "epoch": 6.28, "learning_rate": 2.0649009110547574e-05, "loss": 0.8402, "step": 7433, "task_loss": 0.5521501302719116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9000744819641113, "epoch": 6.28, "learning_rate": 2.064431295200526e-05, "loss": 0.9367, "step": 7434, "task_loss": 0.5535411834716797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6844930648803711, "epoch": 6.28, "learning_rate": 2.063961679346295e-05, "loss": 0.7689, "step": 7435, "task_loss": 0.306185245513916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9600838422775269, "epoch": 6.29, "learning_rate": 2.0634920634920636e-05, "loss": 0.8011, "step": 7436, "task_loss": 0.6257910132408142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9888410568237305, "epoch": 6.29, "learning_rate": 2.0630224476378323e-05, "loss": 0.6437, "step": 7437, "task_loss": 0.355661004781723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1719802618026733, "epoch": 6.29, "learning_rate": 2.062552831783601e-05, "loss": 1.1153, "step": 7438, "task_loss": 1.41117262840271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6061360836029053, "epoch": 6.29, "learning_rate": 2.06208321592937e-05, "loss": 0.6338, "step": 7439, "task_loss": 0.8253005743026733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4684944152832031, "epoch": 6.29, "learning_rate": 2.061613600075139e-05, "loss": 0.84, "step": 7440, "task_loss": 2.695319652557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5640551447868347, "epoch": 6.29, "learning_rate": 2.0611439842209075e-05, "loss": 0.8043, "step": 7441, "task_loss": 0.8695645332336426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6319279670715332, "epoch": 6.29, "learning_rate": 2.060674368366676e-05, "loss": 0.8638, "step": 7442, "task_loss": 0.8624377250671387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6750088930130005, "epoch": 6.29, "learning_rate": 2.0602047525124448e-05, "loss": 0.6537, "step": 7443, "task_loss": 0.48857519030570984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5105949640274048, "epoch": 6.29, "learning_rate": 2.0597351366582137e-05, "loss": 0.5997, "step": 7444, "task_loss": 0.366051584482193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7292328476905823, "epoch": 6.29, "learning_rate": 2.0592655208039824e-05, "loss": 0.9565, "step": 7445, "task_loss": 0.4703395366668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6171258687973022, "epoch": 6.29, "learning_rate": 2.0587959049497513e-05, "loss": 0.6895, "step": 7446, "task_loss": 0.8289067149162292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8149582147598267, "epoch": 6.29, "learning_rate": 2.05832628909552e-05, "loss": 0.7496, "step": 7447, "task_loss": 0.7453181743621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3368885517120361, "epoch": 6.3, "learning_rate": 2.057856673241289e-05, "loss": 0.9133, "step": 7448, "task_loss": 1.1894291639328003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7083490490913391, "epoch": 6.3, "learning_rate": 2.0573870573870572e-05, "loss": 0.6579, "step": 7449, "task_loss": 0.9935703277587891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7421674728393555, "epoch": 6.3, "learning_rate": 2.0569174415328262e-05, "loss": 1.0137, "step": 7450, "task_loss": 1.1948834657669067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9806674122810364, "epoch": 6.3, "learning_rate": 2.056447825678595e-05, "loss": 0.8114, "step": 7451, "task_loss": 0.7849366664886475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7446266412734985, "epoch": 6.3, "learning_rate": 2.0559782098243638e-05, "loss": 0.7426, "step": 7452, "task_loss": 0.24225614964962006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9784231185913086, "epoch": 6.3, "learning_rate": 2.0555085939701324e-05, "loss": 0.7389, "step": 7453, "task_loss": 1.2189289331436157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.657981276512146, "epoch": 6.3, "learning_rate": 2.0550389781159014e-05, "loss": 0.5278, "step": 7454, "task_loss": 0.6073331236839294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8415519595146179, "epoch": 6.3, "learning_rate": 2.05456936226167e-05, "loss": 0.8204, "step": 7455, "task_loss": 1.8905978202819824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.767527163028717, "epoch": 6.3, "learning_rate": 2.0540997464074387e-05, "loss": 0.6762, "step": 7456, "task_loss": 0.5372083783149719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5876808166503906, "epoch": 6.3, "learning_rate": 2.0536301305532077e-05, "loss": 0.6477, "step": 7457, "task_loss": 1.4048779010772705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8592723608016968, "epoch": 6.3, "learning_rate": 2.0531605146989763e-05, "loss": 0.6292, "step": 7458, "task_loss": 0.8719321489334106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5751591920852661, "epoch": 6.3, "learning_rate": 2.0526908988447453e-05, "loss": 0.7772, "step": 7459, "task_loss": 1.1154478788375854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7422034740447998, "epoch": 6.31, "learning_rate": 2.052221282990514e-05, "loss": 0.7891, "step": 7460, "task_loss": 0.7303847670555115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7193036079406738, "epoch": 6.31, "learning_rate": 2.0517516671362825e-05, "loss": 0.558, "step": 7461, "task_loss": 0.32498764991760254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1371911764144897, "epoch": 6.31, "learning_rate": 2.0512820512820512e-05, "loss": 0.7194, "step": 7462, "task_loss": 1.1886568069458008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.574236273765564, "epoch": 6.31, "learning_rate": 2.05081243542782e-05, "loss": 0.8314, "step": 7463, "task_loss": 0.28935617208480835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3699676990509033, "epoch": 6.31, "learning_rate": 2.0503428195735888e-05, "loss": 0.6049, "step": 7464, "task_loss": 0.6173701882362366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5916211605072021, "epoch": 6.31, "learning_rate": 2.0498732037193578e-05, "loss": 0.803, "step": 7465, "task_loss": 0.29219841957092285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5717437267303467, "epoch": 6.31, "learning_rate": 2.0494035878651264e-05, "loss": 0.6655, "step": 7466, "task_loss": 0.25926673412323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6863039135932922, "epoch": 6.31, "learning_rate": 2.0489339720108954e-05, "loss": 0.803, "step": 7467, "task_loss": 0.697150468826294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0390230417251587, "epoch": 6.31, "learning_rate": 2.0484643561566637e-05, "loss": 0.7169, "step": 7468, "task_loss": 1.4959977865219116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8238416910171509, "epoch": 6.31, "learning_rate": 2.0479947403024326e-05, "loss": 1.0381, "step": 7469, "task_loss": 0.9743215441703796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7851182222366333, "epoch": 6.31, "learning_rate": 2.0475251244482016e-05, "loss": 0.8725, "step": 7470, "task_loss": 0.5519124269485474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.554581880569458, "epoch": 6.32, "learning_rate": 2.0470555085939702e-05, "loss": 0.8311, "step": 7471, "task_loss": 1.0280426740646362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7475284337997437, "epoch": 6.32, "learning_rate": 2.0465858927397392e-05, "loss": 0.7034, "step": 7472, "task_loss": 0.7715740203857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48075205087661743, "epoch": 6.32, "learning_rate": 2.046116276885508e-05, "loss": 0.6402, "step": 7473, "task_loss": 1.2195582389831543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9182165861129761, "epoch": 6.32, "learning_rate": 2.0456466610312765e-05, "loss": 0.7817, "step": 7474, "task_loss": 0.8701863884925842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7879924774169922, "epoch": 6.32, "learning_rate": 2.045177045177045e-05, "loss": 0.8162, "step": 7475, "task_loss": 0.6226803064346313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9207605123519897, "epoch": 6.32, "learning_rate": 2.044707429322814e-05, "loss": 0.8212, "step": 7476, "task_loss": 1.193554401397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3807130455970764, "epoch": 6.32, "learning_rate": 2.0442378134685827e-05, "loss": 0.5516, "step": 7477, "task_loss": 0.7210559844970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0903037786483765, "epoch": 6.32, "learning_rate": 2.0437681976143517e-05, "loss": 0.7389, "step": 7478, "task_loss": 0.9299229979515076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1902445554733276, "epoch": 6.32, "learning_rate": 2.0432985817601203e-05, "loss": 0.7538, "step": 7479, "task_loss": 1.3490186929702759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6994028687477112, "epoch": 6.32, "learning_rate": 2.0428289659058893e-05, "loss": 0.7985, "step": 7480, "task_loss": 1.6950331926345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8571994304656982, "epoch": 6.32, "learning_rate": 2.0423593500516576e-05, "loss": 0.7862, "step": 7481, "task_loss": 0.42559173703193665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.857463538646698, "epoch": 6.32, "learning_rate": 2.0418897341974266e-05, "loss": 0.7129, "step": 7482, "task_loss": 0.5072205066680908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4534081220626831, "epoch": 6.33, "learning_rate": 2.0414201183431952e-05, "loss": 0.5109, "step": 7483, "task_loss": 0.4049938917160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7866555452346802, "epoch": 6.33, "learning_rate": 2.040950502488964e-05, "loss": 0.8483, "step": 7484, "task_loss": 1.0585737228393555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4317275285720825, "epoch": 6.33, "learning_rate": 2.040480886634733e-05, "loss": 0.7575, "step": 7485, "task_loss": 0.8607675433158875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4805961549282074, "epoch": 6.33, "learning_rate": 2.0400112707805018e-05, "loss": 0.6726, "step": 7486, "task_loss": 0.9413197040557861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6208993196487427, "epoch": 6.33, "learning_rate": 2.0395416549262704e-05, "loss": 0.7515, "step": 7487, "task_loss": 0.7183432579040527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5415467619895935, "epoch": 6.33, "learning_rate": 2.039072039072039e-05, "loss": 0.5815, "step": 7488, "task_loss": 0.30218827724456787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26674285531044006, "epoch": 6.33, "learning_rate": 2.038602423217808e-05, "loss": 0.4508, "step": 7489, "task_loss": 0.04002999886870384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0022109746932983, "epoch": 6.33, "learning_rate": 2.0381328073635766e-05, "loss": 0.7987, "step": 7490, "task_loss": 0.5550556778907776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0105533599853516, "epoch": 6.33, "learning_rate": 2.0376631915093456e-05, "loss": 0.9286, "step": 7491, "task_loss": 0.5031957030296326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4203837215900421, "epoch": 6.33, "learning_rate": 2.0371935756551143e-05, "loss": 0.6792, "step": 7492, "task_loss": 0.19901612401008606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5406327247619629, "epoch": 6.33, "learning_rate": 2.036723959800883e-05, "loss": 0.7482, "step": 7493, "task_loss": 0.7134935855865479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3313896656036377, "epoch": 6.33, "learning_rate": 2.0362543439466515e-05, "loss": 0.6097, "step": 7494, "task_loss": 0.7809665203094482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.714168906211853, "epoch": 6.34, "learning_rate": 2.0357847280924205e-05, "loss": 0.8793, "step": 7495, "task_loss": 0.9553563594818115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5723742842674255, "epoch": 6.34, "learning_rate": 2.035315112238189e-05, "loss": 0.7048, "step": 7496, "task_loss": 0.340175598859787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1017974615097046, "epoch": 6.34, "learning_rate": 2.034845496383958e-05, "loss": 0.7591, "step": 7497, "task_loss": 1.0998488664627075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5159154534339905, "epoch": 6.34, "learning_rate": 2.0343758805297267e-05, "loss": 0.7451, "step": 7498, "task_loss": 0.5804292559623718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8280482888221741, "epoch": 6.34, "learning_rate": 2.0339062646754957e-05, "loss": 0.7072, "step": 7499, "task_loss": 0.7390979528427124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7301377058029175, "epoch": 6.34, "learning_rate": 2.033436648821264e-05, "loss": 0.7402, "step": 7500, "task_loss": 0.823592483997345 }, { "epoch": 6.34, "eval_accuracy": 0.8906138613861386, "eval_loss": 0.48427197337150574, "eval_runtime": 227.9954, "eval_samples_per_second": 110.748, "eval_steps_per_second": 0.868, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065465211868286, "epoch": 6.34, "learning_rate": 2.032967032967033e-05, "loss": 0.641, "step": 7501, "task_loss": 1.3591396808624268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.071468710899353, "epoch": 6.34, "learning_rate": 2.032497417112802e-05, "loss": 0.9143, "step": 7502, "task_loss": 0.5443183183670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6417160034179688, "epoch": 6.34, "learning_rate": 2.0320278012585706e-05, "loss": 0.6334, "step": 7503, "task_loss": 0.4025523364543915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6647366881370544, "epoch": 6.34, "learning_rate": 2.0315581854043396e-05, "loss": 0.6823, "step": 7504, "task_loss": 0.39806410670280457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5919621586799622, "epoch": 6.34, "learning_rate": 2.0310885695501082e-05, "loss": 0.6561, "step": 7505, "task_loss": 0.36361443996429443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7387390732765198, "epoch": 6.34, "learning_rate": 2.0306189536958768e-05, "loss": 0.6673, "step": 7506, "task_loss": 1.030948281288147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49842870235443115, "epoch": 6.35, "learning_rate": 2.0301493378416455e-05, "loss": 0.7408, "step": 7507, "task_loss": 0.414078950881958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8158503770828247, "epoch": 6.35, "learning_rate": 2.0296797219874144e-05, "loss": 0.8865, "step": 7508, "task_loss": 0.15955457091331482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42736566066741943, "epoch": 6.35, "learning_rate": 2.029210106133183e-05, "loss": 0.7908, "step": 7509, "task_loss": 1.1600167751312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8904193639755249, "epoch": 6.35, "learning_rate": 2.028740490278952e-05, "loss": 0.7694, "step": 7510, "task_loss": 0.6995530724525452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40880489349365234, "epoch": 6.35, "learning_rate": 2.0282708744247207e-05, "loss": 0.6613, "step": 7511, "task_loss": 0.48771902918815613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8147513270378113, "epoch": 6.35, "learning_rate": 2.0278012585704893e-05, "loss": 0.629, "step": 7512, "task_loss": 0.6779705286026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5522522926330566, "epoch": 6.35, "learning_rate": 2.027331642716258e-05, "loss": 0.7524, "step": 7513, "task_loss": 0.6143159866333008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5842427611351013, "epoch": 6.35, "learning_rate": 2.026862026862027e-05, "loss": 0.6785, "step": 7514, "task_loss": 0.37798845767974854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5957674980163574, "epoch": 6.35, "learning_rate": 2.0263924110077955e-05, "loss": 0.6384, "step": 7515, "task_loss": 1.0146468877792358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8060710430145264, "epoch": 6.35, "learning_rate": 2.0259227951535645e-05, "loss": 0.7518, "step": 7516, "task_loss": 0.7413665652275085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5653182864189148, "epoch": 6.35, "learning_rate": 2.0254531792993335e-05, "loss": 0.6608, "step": 7517, "task_loss": 0.18142037093639374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.567977786064148, "epoch": 6.35, "learning_rate": 2.024983563445102e-05, "loss": 0.6526, "step": 7518, "task_loss": 0.7732404470443726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1748316287994385, "epoch": 6.36, "learning_rate": 2.0245139475908708e-05, "loss": 0.8862, "step": 7519, "task_loss": 0.6319252848625183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47125524282455444, "epoch": 6.36, "learning_rate": 2.0240443317366394e-05, "loss": 0.868, "step": 7520, "task_loss": 0.39438170194625854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6819478869438171, "epoch": 6.36, "learning_rate": 2.0235747158824084e-05, "loss": 0.7978, "step": 7521, "task_loss": 1.2663321495056152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9620743989944458, "epoch": 6.36, "learning_rate": 2.023105100028177e-05, "loss": 1.0633, "step": 7522, "task_loss": 2.177733898162842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.101214051246643, "epoch": 6.36, "learning_rate": 2.022635484173946e-05, "loss": 0.7539, "step": 7523, "task_loss": 0.4256002902984619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7569538950920105, "epoch": 6.36, "learning_rate": 2.0221658683197146e-05, "loss": 0.8757, "step": 7524, "task_loss": 1.3909714221954346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4687160551548004, "epoch": 6.36, "learning_rate": 2.0216962524654832e-05, "loss": 0.7759, "step": 7525, "task_loss": 0.6716315150260925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5712404251098633, "epoch": 6.36, "learning_rate": 2.021226636611252e-05, "loss": 0.7075, "step": 7526, "task_loss": 0.9849943518638611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7025734186172485, "epoch": 6.36, "learning_rate": 2.020757020757021e-05, "loss": 0.6634, "step": 7527, "task_loss": 0.1741601675748825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8958563804626465, "epoch": 6.36, "learning_rate": 2.0202874049027895e-05, "loss": 0.6974, "step": 7528, "task_loss": 0.6891791820526123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3044697046279907, "epoch": 6.36, "learning_rate": 2.0198177890485585e-05, "loss": 0.8575, "step": 7529, "task_loss": 1.4352550506591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0483877658843994, "epoch": 6.36, "learning_rate": 2.019348173194327e-05, "loss": 0.8674, "step": 7530, "task_loss": 0.4578039050102234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5349478125572205, "epoch": 6.37, "learning_rate": 2.018878557340096e-05, "loss": 0.5485, "step": 7531, "task_loss": 0.18011540174484253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40835249423980713, "epoch": 6.37, "learning_rate": 2.0184089414858647e-05, "loss": 0.7038, "step": 7532, "task_loss": 0.5932508111000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.719973087310791, "epoch": 6.37, "learning_rate": 2.0179393256316333e-05, "loss": 0.7931, "step": 7533, "task_loss": 0.6570042967796326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7316364645957947, "epoch": 6.37, "learning_rate": 2.0174697097774023e-05, "loss": 0.5838, "step": 7534, "task_loss": 0.3978091776371002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8342056274414062, "epoch": 6.37, "learning_rate": 2.017000093923171e-05, "loss": 0.8349, "step": 7535, "task_loss": 1.0953102111816406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0915167331695557, "epoch": 6.37, "learning_rate": 2.01653047806894e-05, "loss": 0.9528, "step": 7536, "task_loss": 0.5630344152450562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5937149524688721, "epoch": 6.37, "learning_rate": 2.0160608622147085e-05, "loss": 0.7136, "step": 7537, "task_loss": 0.1944558173418045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9827777147293091, "epoch": 6.37, "learning_rate": 2.0155912463604772e-05, "loss": 0.8215, "step": 7538, "task_loss": 1.0903899669647217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0574376583099365, "epoch": 6.37, "learning_rate": 2.0151216305062458e-05, "loss": 0.9948, "step": 7539, "task_loss": 1.515382170677185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7983678579330444, "epoch": 6.37, "learning_rate": 2.0146520146520148e-05, "loss": 0.9417, "step": 7540, "task_loss": 0.48026594519615173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4345841407775879, "epoch": 6.37, "learning_rate": 2.0141823987977834e-05, "loss": 0.5429, "step": 7541, "task_loss": 0.822762668132782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4187994599342346, "epoch": 6.38, "learning_rate": 2.0137127829435524e-05, "loss": 0.5927, "step": 7542, "task_loss": 0.12753166258335114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6323968172073364, "epoch": 6.38, "learning_rate": 2.013243167089321e-05, "loss": 0.7228, "step": 7543, "task_loss": 0.6080396175384521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8404750227928162, "epoch": 6.38, "learning_rate": 2.0127735512350897e-05, "loss": 0.7509, "step": 7544, "task_loss": 0.5601015686988831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5631935596466064, "epoch": 6.38, "learning_rate": 2.0123039353808583e-05, "loss": 0.939, "step": 7545, "task_loss": 0.7316991090774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6040771007537842, "epoch": 6.38, "learning_rate": 2.0118343195266273e-05, "loss": 0.6489, "step": 7546, "task_loss": 0.3803209066390991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4479072093963623, "epoch": 6.38, "learning_rate": 2.0113647036723962e-05, "loss": 0.6534, "step": 7547, "task_loss": 1.0161069631576538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7445828318595886, "epoch": 6.38, "learning_rate": 2.010895087818165e-05, "loss": 0.6282, "step": 7548, "task_loss": 0.8260242938995361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6225458383560181, "epoch": 6.38, "learning_rate": 2.010425471963934e-05, "loss": 0.7293, "step": 7549, "task_loss": 0.7846152186393738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7853468060493469, "epoch": 6.38, "learning_rate": 2.0099558561097025e-05, "loss": 0.5, "step": 7550, "task_loss": 0.36259153485298157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2956175804138184, "epoch": 6.38, "learning_rate": 2.009486240255471e-05, "loss": 0.9392, "step": 7551, "task_loss": 1.1912652254104614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5296987295150757, "epoch": 6.38, "learning_rate": 2.0090166244012397e-05, "loss": 0.5891, "step": 7552, "task_loss": 0.8086316585540771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41401636600494385, "epoch": 6.38, "learning_rate": 2.0085470085470087e-05, "loss": 0.6248, "step": 7553, "task_loss": 0.395498126745224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.548647403717041, "epoch": 6.39, "learning_rate": 2.0080773926927774e-05, "loss": 0.7016, "step": 7554, "task_loss": 1.0272201299667358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46696850657463074, "epoch": 6.39, "learning_rate": 2.0076077768385463e-05, "loss": 0.7131, "step": 7555, "task_loss": 0.6116682291030884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7245603799819946, "epoch": 6.39, "learning_rate": 2.007138160984315e-05, "loss": 0.7812, "step": 7556, "task_loss": 0.7864206433296204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6072379350662231, "epoch": 6.39, "learning_rate": 2.0066685451300836e-05, "loss": 0.831, "step": 7557, "task_loss": 0.7088528871536255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7353689074516296, "epoch": 6.39, "learning_rate": 2.0061989292758522e-05, "loss": 0.8521, "step": 7558, "task_loss": 0.9674023389816284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4925864636898041, "epoch": 6.39, "learning_rate": 2.0057293134216212e-05, "loss": 0.7468, "step": 7559, "task_loss": 0.5418189764022827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3976247310638428, "epoch": 6.39, "learning_rate": 2.00525969756739e-05, "loss": 0.8953, "step": 7560, "task_loss": 1.0477639436721802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0549635887145996, "epoch": 6.39, "learning_rate": 2.0047900817131588e-05, "loss": 0.8228, "step": 7561, "task_loss": 1.692091464996338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6518211960792542, "epoch": 6.39, "learning_rate": 2.0043204658589278e-05, "loss": 0.5542, "step": 7562, "task_loss": 0.5194807648658752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7623116374015808, "epoch": 6.39, "learning_rate": 2.003850850004696e-05, "loss": 0.9223, "step": 7563, "task_loss": 0.39547768235206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9316878318786621, "epoch": 6.39, "learning_rate": 2.003381234150465e-05, "loss": 0.7812, "step": 7564, "task_loss": 1.363194227218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6729598641395569, "epoch": 6.39, "learning_rate": 2.0029116182962337e-05, "loss": 0.7754, "step": 7565, "task_loss": 0.9899405837059021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9512329697608948, "epoch": 6.4, "learning_rate": 2.0024420024420027e-05, "loss": 0.8025, "step": 7566, "task_loss": 1.0848573446273804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9794129133224487, "epoch": 6.4, "learning_rate": 2.0019723865877713e-05, "loss": 0.799, "step": 7567, "task_loss": 0.671390950679779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.792204737663269, "epoch": 6.4, "learning_rate": 2.0015027707335403e-05, "loss": 0.7902, "step": 7568, "task_loss": 0.6581438779830933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9649010896682739, "epoch": 6.4, "learning_rate": 2.001033154879309e-05, "loss": 1.3135, "step": 7569, "task_loss": 0.9912058711051941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5095652937889099, "epoch": 6.4, "learning_rate": 2.0005635390250775e-05, "loss": 0.7887, "step": 7570, "task_loss": 0.6384296417236328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7252198457717896, "epoch": 6.4, "learning_rate": 2.000093923170846e-05, "loss": 0.8093, "step": 7571, "task_loss": 0.7928059101104736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8304101228713989, "epoch": 6.4, "learning_rate": 1.999624307316615e-05, "loss": 0.8171, "step": 7572, "task_loss": 0.8555334806442261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39439857006073, "epoch": 6.4, "learning_rate": 1.9991546914623838e-05, "loss": 0.6802, "step": 7573, "task_loss": 0.6421467065811157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9881793856620789, "epoch": 6.4, "learning_rate": 1.9986850756081527e-05, "loss": 0.8473, "step": 7574, "task_loss": 0.7989943027496338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1185126304626465, "epoch": 6.4, "learning_rate": 1.9982154597539214e-05, "loss": 0.8876, "step": 7575, "task_loss": 1.5409964323043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4071910083293915, "epoch": 6.4, "learning_rate": 1.99774584389969e-05, "loss": 0.7298, "step": 7576, "task_loss": 0.3754138648509979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45496195554733276, "epoch": 6.4, "learning_rate": 1.9972762280454586e-05, "loss": 0.8587, "step": 7577, "task_loss": 0.5419254302978516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4706938862800598, "epoch": 6.41, "learning_rate": 1.9968066121912276e-05, "loss": 0.6613, "step": 7578, "task_loss": 0.9052693843841553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6920653581619263, "epoch": 6.41, "learning_rate": 1.9963369963369966e-05, "loss": 0.6871, "step": 7579, "task_loss": 0.6808609962463379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0918956995010376, "epoch": 6.41, "learning_rate": 1.9958673804827652e-05, "loss": 0.7848, "step": 7580, "task_loss": 1.9416871070861816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9899260401725769, "epoch": 6.41, "learning_rate": 1.9953977646285342e-05, "loss": 0.7407, "step": 7581, "task_loss": 0.7347291707992554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35924428701400757, "epoch": 6.41, "learning_rate": 1.994928148774303e-05, "loss": 0.5293, "step": 7582, "task_loss": 0.49180716276168823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9731068015098572, "epoch": 6.41, "learning_rate": 1.9944585329200715e-05, "loss": 0.6994, "step": 7583, "task_loss": 0.7984768748283386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.800491213798523, "epoch": 6.41, "learning_rate": 1.99398891706584e-05, "loss": 0.778, "step": 7584, "task_loss": 0.5808577537536621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2880233526229858, "epoch": 6.41, "learning_rate": 1.993519301211609e-05, "loss": 0.7864, "step": 7585, "task_loss": 1.692447304725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6164824962615967, "epoch": 6.41, "learning_rate": 1.9930496853573777e-05, "loss": 0.7138, "step": 7586, "task_loss": 0.7234996557235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6692668199539185, "epoch": 6.41, "learning_rate": 1.9925800695031467e-05, "loss": 0.6475, "step": 7587, "task_loss": 1.4803858995437622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6115835905075073, "epoch": 6.41, "learning_rate": 1.9921104536489153e-05, "loss": 0.65, "step": 7588, "task_loss": 1.0782718658447266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9879306554794312, "epoch": 6.41, "learning_rate": 1.991640837794684e-05, "loss": 0.8429, "step": 7589, "task_loss": 1.0999752283096313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.014695167541504, "epoch": 6.42, "learning_rate": 1.9911712219404526e-05, "loss": 0.733, "step": 7590, "task_loss": 1.1705737113952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6471611261367798, "epoch": 6.42, "learning_rate": 1.9907016060862216e-05, "loss": 0.7367, "step": 7591, "task_loss": 1.296729564666748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.812690019607544, "epoch": 6.42, "learning_rate": 1.9902319902319902e-05, "loss": 0.9234, "step": 7592, "task_loss": 0.6287636160850525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5414273738861084, "epoch": 6.42, "learning_rate": 1.989762374377759e-05, "loss": 0.6624, "step": 7593, "task_loss": 0.4580233097076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9044414758682251, "epoch": 6.42, "learning_rate": 1.989292758523528e-05, "loss": 0.8567, "step": 7594, "task_loss": 1.8658394813537598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37194591760635376, "epoch": 6.42, "learning_rate": 1.9888231426692964e-05, "loss": 0.5345, "step": 7595, "task_loss": 1.337389588356018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5872178673744202, "epoch": 6.42, "learning_rate": 1.9883535268150654e-05, "loss": 0.8213, "step": 7596, "task_loss": 0.6628521680831909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.927291989326477, "epoch": 6.42, "learning_rate": 1.987883910960834e-05, "loss": 0.9239, "step": 7597, "task_loss": 1.6592340469360352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3163343071937561, "epoch": 6.42, "learning_rate": 1.987414295106603e-05, "loss": 0.6493, "step": 7598, "task_loss": 0.39621442556381226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7799478769302368, "epoch": 6.42, "learning_rate": 1.9869446792523716e-05, "loss": 0.6848, "step": 7599, "task_loss": 1.5239591598510742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0257673263549805, "epoch": 6.42, "learning_rate": 1.9864750633981406e-05, "loss": 0.8307, "step": 7600, "task_loss": 1.2594103813171387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0639734268188477, "epoch": 6.42, "learning_rate": 1.9860054475439093e-05, "loss": 0.5799, "step": 7601, "task_loss": 2.3172764778137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6554515361785889, "epoch": 6.43, "learning_rate": 1.985535831689678e-05, "loss": 0.5763, "step": 7602, "task_loss": 0.5721491575241089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7856898307800293, "epoch": 6.43, "learning_rate": 1.9850662158354465e-05, "loss": 0.8173, "step": 7603, "task_loss": 0.38338908553123474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5960242748260498, "epoch": 6.43, "learning_rate": 1.9845965999812155e-05, "loss": 0.7229, "step": 7604, "task_loss": 0.5853872299194336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.019831657409668, "epoch": 6.43, "learning_rate": 1.984126984126984e-05, "loss": 0.8199, "step": 7605, "task_loss": 0.946938693523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5050774216651917, "epoch": 6.43, "learning_rate": 1.983657368272753e-05, "loss": 0.5934, "step": 7606, "task_loss": 0.7784441113471985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8492951393127441, "epoch": 6.43, "learning_rate": 1.9831877524185217e-05, "loss": 0.6883, "step": 7607, "task_loss": 1.4425342082977295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9901495575904846, "epoch": 6.43, "learning_rate": 1.9827181365642904e-05, "loss": 0.8387, "step": 7608, "task_loss": 0.914306104183197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9025952219963074, "epoch": 6.43, "learning_rate": 1.9822485207100593e-05, "loss": 0.6986, "step": 7609, "task_loss": 1.5781766176223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3488155603408813, "epoch": 6.43, "learning_rate": 1.981778904855828e-05, "loss": 0.7673, "step": 7610, "task_loss": 0.9815776348114014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8924047946929932, "epoch": 6.43, "learning_rate": 1.981309289001597e-05, "loss": 0.7199, "step": 7611, "task_loss": 1.5721698999404907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5159244537353516, "epoch": 6.43, "learning_rate": 1.9808396731473656e-05, "loss": 0.705, "step": 7612, "task_loss": 0.8294304013252258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.601977527141571, "epoch": 6.44, "learning_rate": 1.9803700572931346e-05, "loss": 0.6346, "step": 7613, "task_loss": 0.450419545173645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9583681225776672, "epoch": 6.44, "learning_rate": 1.9799004414389032e-05, "loss": 0.8706, "step": 7614, "task_loss": 0.9907078742980957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0647671222686768, "epoch": 6.44, "learning_rate": 1.9794308255846718e-05, "loss": 0.9387, "step": 7615, "task_loss": 0.9203618764877319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7498989701271057, "epoch": 6.44, "learning_rate": 1.9789612097304405e-05, "loss": 0.8229, "step": 7616, "task_loss": 1.3346717357635498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46873050928115845, "epoch": 6.44, "learning_rate": 1.9784915938762094e-05, "loss": 0.6441, "step": 7617, "task_loss": 0.6018404960632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7259403467178345, "epoch": 6.44, "learning_rate": 1.978021978021978e-05, "loss": 0.762, "step": 7618, "task_loss": 0.9145200252532959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6513059139251709, "epoch": 6.44, "learning_rate": 1.977552362167747e-05, "loss": 0.742, "step": 7619, "task_loss": 1.0814377069473267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7161652445793152, "epoch": 6.44, "learning_rate": 1.9770827463135157e-05, "loss": 0.783, "step": 7620, "task_loss": 1.3463190793991089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1232523918151855, "epoch": 6.44, "learning_rate": 1.9766131304592843e-05, "loss": 0.6817, "step": 7621, "task_loss": 0.5128882527351379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6046174168586731, "epoch": 6.44, "learning_rate": 1.976143514605053e-05, "loss": 0.7081, "step": 7622, "task_loss": 1.371252417564392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4807848036289215, "epoch": 6.44, "learning_rate": 1.975673898750822e-05, "loss": 0.5205, "step": 7623, "task_loss": 1.3594398498535156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7682082653045654, "epoch": 6.44, "learning_rate": 1.975204282896591e-05, "loss": 0.5946, "step": 7624, "task_loss": 0.5053848028182983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6603572368621826, "epoch": 6.45, "learning_rate": 1.9747346670423595e-05, "loss": 0.9033, "step": 7625, "task_loss": 0.6079197525978088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7668554186820984, "epoch": 6.45, "learning_rate": 1.9742650511881285e-05, "loss": 0.7362, "step": 7626, "task_loss": 0.8268011808395386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7177993655204773, "epoch": 6.45, "learning_rate": 1.9737954353338968e-05, "loss": 0.7783, "step": 7627, "task_loss": 1.0116909742355347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9775610566139221, "epoch": 6.45, "learning_rate": 1.9733258194796658e-05, "loss": 0.8983, "step": 7628, "task_loss": 1.5301963090896606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.607850193977356, "epoch": 6.45, "learning_rate": 1.9728562036254344e-05, "loss": 0.5596, "step": 7629, "task_loss": 0.6180531978607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.704442024230957, "epoch": 6.45, "learning_rate": 1.9723865877712034e-05, "loss": 0.8731, "step": 7630, "task_loss": 0.4971233010292053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3705311417579651, "epoch": 6.45, "learning_rate": 1.971916971916972e-05, "loss": 0.5788, "step": 7631, "task_loss": 0.24547626078128815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5417773723602295, "epoch": 6.45, "learning_rate": 1.971447356062741e-05, "loss": 0.7832, "step": 7632, "task_loss": 0.8369224071502686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8113960027694702, "epoch": 6.45, "learning_rate": 1.9709777402085096e-05, "loss": 0.6169, "step": 7633, "task_loss": 0.3220120668411255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7441126704216003, "epoch": 6.45, "learning_rate": 1.9705081243542782e-05, "loss": 0.6375, "step": 7634, "task_loss": 1.0451297760009766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8870585560798645, "epoch": 6.45, "learning_rate": 1.970038508500047e-05, "loss": 0.9103, "step": 7635, "task_loss": 0.7072421312332153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7330875992774963, "epoch": 6.45, "learning_rate": 1.969568892645816e-05, "loss": 0.6812, "step": 7636, "task_loss": 0.7836346626281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4489344656467438, "epoch": 6.46, "learning_rate": 1.9690992767915845e-05, "loss": 0.609, "step": 7637, "task_loss": 0.028671870008111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6549336314201355, "epoch": 6.46, "learning_rate": 1.9686296609373535e-05, "loss": 0.7351, "step": 7638, "task_loss": 2.1135644912719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1013147830963135, "epoch": 6.46, "learning_rate": 1.968160045083122e-05, "loss": 1.0509, "step": 7639, "task_loss": 0.34713420271873474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41844791173934937, "epoch": 6.46, "learning_rate": 1.9676904292288907e-05, "loss": 0.6898, "step": 7640, "task_loss": 1.0466957092285156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9395298957824707, "epoch": 6.46, "learning_rate": 1.9672208133746597e-05, "loss": 0.8717, "step": 7641, "task_loss": 1.1734822988510132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1004016399383545, "epoch": 6.46, "learning_rate": 1.9667511975204283e-05, "loss": 0.7955, "step": 7642, "task_loss": 0.5363582372665405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0975589752197266, "epoch": 6.46, "learning_rate": 1.9662815816661973e-05, "loss": 0.9302, "step": 7643, "task_loss": 0.8820284605026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4563593566417694, "epoch": 6.46, "learning_rate": 1.965811965811966e-05, "loss": 0.5336, "step": 7644, "task_loss": 0.36678874492645264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6835901737213135, "epoch": 6.46, "learning_rate": 1.965342349957735e-05, "loss": 0.6555, "step": 7645, "task_loss": 0.8172290325164795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6804521083831787, "epoch": 6.46, "learning_rate": 1.9648727341035032e-05, "loss": 0.833, "step": 7646, "task_loss": 0.6604913473129272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6461050510406494, "epoch": 6.46, "learning_rate": 1.9644031182492722e-05, "loss": 0.5992, "step": 7647, "task_loss": 0.25746679306030273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3549998700618744, "epoch": 6.46, "learning_rate": 1.9639335023950408e-05, "loss": 0.4828, "step": 7648, "task_loss": 0.084767185151577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8378896713256836, "epoch": 6.47, "learning_rate": 1.9634638865408098e-05, "loss": 0.6356, "step": 7649, "task_loss": 0.36759141087532043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5671383142471313, "epoch": 6.47, "learning_rate": 1.9629942706865784e-05, "loss": 0.6062, "step": 7650, "task_loss": 0.531984269618988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.536716639995575, "epoch": 6.47, "learning_rate": 1.9625246548323474e-05, "loss": 0.6329, "step": 7651, "task_loss": 0.46266689896583557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9520303010940552, "epoch": 6.47, "learning_rate": 1.962055038978116e-05, "loss": 0.898, "step": 7652, "task_loss": 1.0796414613723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38990938663482666, "epoch": 6.47, "learning_rate": 1.9615854231238847e-05, "loss": 0.6717, "step": 7653, "task_loss": 0.1859576255083084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6438318490982056, "epoch": 6.47, "learning_rate": 1.9611158072696533e-05, "loss": 0.8422, "step": 7654, "task_loss": 0.39792540669441223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.946978747844696, "epoch": 6.47, "learning_rate": 1.9606461914154223e-05, "loss": 0.7002, "step": 7655, "task_loss": 1.1868681907653809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7481412887573242, "epoch": 6.47, "learning_rate": 1.9601765755611912e-05, "loss": 0.5171, "step": 7656, "task_loss": 1.3572677373886108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5795758962631226, "epoch": 6.47, "learning_rate": 1.95970695970696e-05, "loss": 0.7549, "step": 7657, "task_loss": 0.6179518699645996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1502937078475952, "epoch": 6.47, "learning_rate": 1.9592373438527285e-05, "loss": 0.8883, "step": 7658, "task_loss": 1.883871078491211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6170938611030579, "epoch": 6.47, "learning_rate": 1.958767727998497e-05, "loss": 0.5802, "step": 7659, "task_loss": 0.8290494084358215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43443092703819275, "epoch": 6.47, "learning_rate": 1.958298112144266e-05, "loss": 0.7217, "step": 7660, "task_loss": 0.45499610900878906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4020749032497406, "epoch": 6.48, "learning_rate": 1.9578284962900347e-05, "loss": 0.5663, "step": 7661, "task_loss": 0.6695215106010437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5470622181892395, "epoch": 6.48, "learning_rate": 1.9573588804358037e-05, "loss": 0.6557, "step": 7662, "task_loss": 0.6010783910751343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6080908179283142, "epoch": 6.48, "learning_rate": 1.9568892645815723e-05, "loss": 0.5875, "step": 7663, "task_loss": 0.3884897232055664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0029715299606323, "epoch": 6.48, "learning_rate": 1.9564196487273413e-05, "loss": 0.9147, "step": 7664, "task_loss": 1.9032182693481445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9333693981170654, "epoch": 6.48, "learning_rate": 1.95595003287311e-05, "loss": 0.8198, "step": 7665, "task_loss": 1.3790310621261597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0713704824447632, "epoch": 6.48, "learning_rate": 1.9554804170188786e-05, "loss": 0.7448, "step": 7666, "task_loss": 0.8557037711143494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5958260297775269, "epoch": 6.48, "learning_rate": 1.9550108011646472e-05, "loss": 0.5718, "step": 7667, "task_loss": 0.33300381898880005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5017598271369934, "epoch": 6.48, "learning_rate": 1.9545411853104162e-05, "loss": 0.5906, "step": 7668, "task_loss": 0.6519893407821655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36946403980255127, "epoch": 6.48, "learning_rate": 1.954071569456185e-05, "loss": 0.5557, "step": 7669, "task_loss": 0.36470916867256165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6212729215621948, "epoch": 6.48, "learning_rate": 1.9536019536019538e-05, "loss": 0.7272, "step": 7670, "task_loss": 0.8073986172676086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8842863440513611, "epoch": 6.48, "learning_rate": 1.9531323377477224e-05, "loss": 1.0034, "step": 7671, "task_loss": 0.7476460933685303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5323085784912109, "epoch": 6.48, "learning_rate": 1.952662721893491e-05, "loss": 0.5975, "step": 7672, "task_loss": 1.1198843717575073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5339580774307251, "epoch": 6.49, "learning_rate": 1.95219310603926e-05, "loss": 0.5262, "step": 7673, "task_loss": 0.4230917990207672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.752091109752655, "epoch": 6.49, "learning_rate": 1.9517234901850287e-05, "loss": 0.7387, "step": 7674, "task_loss": 0.4099613428115845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8076943159103394, "epoch": 6.49, "learning_rate": 1.9512538743307977e-05, "loss": 0.7939, "step": 7675, "task_loss": 0.9151133894920349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6077210307121277, "epoch": 6.49, "learning_rate": 1.9507842584765663e-05, "loss": 0.6888, "step": 7676, "task_loss": 0.46267110109329224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9182029962539673, "epoch": 6.49, "learning_rate": 1.9503146426223353e-05, "loss": 0.7513, "step": 7677, "task_loss": 0.6592177748680115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9368127584457397, "epoch": 6.49, "learning_rate": 1.9498450267681036e-05, "loss": 0.8257, "step": 7678, "task_loss": 0.7744563221931458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.040683388710022, "epoch": 6.49, "learning_rate": 1.9493754109138725e-05, "loss": 0.9294, "step": 7679, "task_loss": 1.0319582223892212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7262957096099854, "epoch": 6.49, "learning_rate": 1.948905795059641e-05, "loss": 1.02, "step": 7680, "task_loss": 1.0110260248184204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3978745937347412, "epoch": 6.49, "learning_rate": 1.94843617920541e-05, "loss": 0.631, "step": 7681, "task_loss": 0.45489755272865295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6194429993629456, "epoch": 6.49, "learning_rate": 1.9479665633511788e-05, "loss": 0.7523, "step": 7682, "task_loss": 0.9386467933654785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6541781425476074, "epoch": 6.49, "learning_rate": 1.9474969474969477e-05, "loss": 0.7375, "step": 7683, "task_loss": 0.14900720119476318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7662619948387146, "epoch": 6.5, "learning_rate": 1.9470273316427164e-05, "loss": 0.7945, "step": 7684, "task_loss": 1.5367803573608398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.974721372127533, "epoch": 6.5, "learning_rate": 1.946557715788485e-05, "loss": 0.7873, "step": 7685, "task_loss": 1.3970459699630737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.744037389755249, "epoch": 6.5, "learning_rate": 1.946088099934254e-05, "loss": 0.7389, "step": 7686, "task_loss": 0.7892038822174072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5587062835693359, "epoch": 6.5, "learning_rate": 1.9456184840800226e-05, "loss": 0.6255, "step": 7687, "task_loss": 0.8366269469261169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7333464622497559, "epoch": 6.5, "learning_rate": 1.9451488682257916e-05, "loss": 0.657, "step": 7688, "task_loss": 0.7022040486335754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5175093412399292, "epoch": 6.5, "learning_rate": 1.9446792523715602e-05, "loss": 0.5341, "step": 7689, "task_loss": 0.993614673614502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4450823068618774, "epoch": 6.5, "learning_rate": 1.944209636517329e-05, "loss": 0.865, "step": 7690, "task_loss": 1.473414421081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2266720533370972, "epoch": 6.5, "learning_rate": 1.9437400206630975e-05, "loss": 0.797, "step": 7691, "task_loss": 1.6193474531173706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39499473571777344, "epoch": 6.5, "learning_rate": 1.9432704048088665e-05, "loss": 0.7253, "step": 7692, "task_loss": 0.14277000725269318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8442603349685669, "epoch": 6.5, "learning_rate": 1.942800788954635e-05, "loss": 0.7514, "step": 7693, "task_loss": 0.5949330925941467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6672244071960449, "epoch": 6.5, "learning_rate": 1.942331173100404e-05, "loss": 0.733, "step": 7694, "task_loss": 0.6044737100601196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3811165988445282, "epoch": 6.5, "learning_rate": 1.9418615572461727e-05, "loss": 0.6609, "step": 7695, "task_loss": 0.19134503602981567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9168832898139954, "epoch": 6.51, "learning_rate": 1.9413919413919417e-05, "loss": 0.8501, "step": 7696, "task_loss": 1.3544936180114746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8563758134841919, "epoch": 6.51, "learning_rate": 1.9409223255377103e-05, "loss": 0.7715, "step": 7697, "task_loss": 1.2832733392715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8305966258049011, "epoch": 6.51, "learning_rate": 1.940452709683479e-05, "loss": 0.6625, "step": 7698, "task_loss": 1.1045334339141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3913740813732147, "epoch": 6.51, "learning_rate": 1.9399830938292476e-05, "loss": 0.6396, "step": 7699, "task_loss": 0.26936590671539307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5535858273506165, "epoch": 6.51, "learning_rate": 1.9395134779750165e-05, "loss": 0.6665, "step": 7700, "task_loss": 0.49178457260131836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7849540114402771, "epoch": 6.51, "learning_rate": 1.9390438621207855e-05, "loss": 0.9252, "step": 7701, "task_loss": 1.8992985486984253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8047544956207275, "epoch": 6.51, "learning_rate": 1.938574246266554e-05, "loss": 0.7258, "step": 7702, "task_loss": 0.8615409135818481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5726311206817627, "epoch": 6.51, "learning_rate": 1.9381046304123228e-05, "loss": 0.804, "step": 7703, "task_loss": 0.5708454251289368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7503408193588257, "epoch": 6.51, "learning_rate": 1.9376350145580914e-05, "loss": 0.8292, "step": 7704, "task_loss": 0.9339148998260498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7210506200790405, "epoch": 6.51, "learning_rate": 1.9371653987038604e-05, "loss": 0.6455, "step": 7705, "task_loss": 0.7204990983009338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8715714812278748, "epoch": 6.51, "learning_rate": 1.936695782849629e-05, "loss": 0.6506, "step": 7706, "task_loss": 0.176946759223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5305806398391724, "epoch": 6.51, "learning_rate": 1.936226166995398e-05, "loss": 0.5084, "step": 7707, "task_loss": 0.6959893107414246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8788647055625916, "epoch": 6.52, "learning_rate": 1.9357565511411666e-05, "loss": 0.819, "step": 7708, "task_loss": 1.4055845737457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0391018390655518, "epoch": 6.52, "learning_rate": 1.9352869352869356e-05, "loss": 0.7539, "step": 7709, "task_loss": 0.9696717262268066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7876948118209839, "epoch": 6.52, "learning_rate": 1.934817319432704e-05, "loss": 0.7556, "step": 7710, "task_loss": 1.4222209453582764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0485601425170898, "epoch": 6.52, "learning_rate": 1.934347703578473e-05, "loss": 1.0434, "step": 7711, "task_loss": 0.38439807295799255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6688847541809082, "epoch": 6.52, "learning_rate": 1.9338780877242415e-05, "loss": 0.8281, "step": 7712, "task_loss": 0.9486384391784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5378887057304382, "epoch": 6.52, "learning_rate": 1.9334084718700105e-05, "loss": 0.6378, "step": 7713, "task_loss": 0.5738407969474792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0102933645248413, "epoch": 6.52, "learning_rate": 1.932938856015779e-05, "loss": 0.7185, "step": 7714, "task_loss": 0.6165779232978821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8767176866531372, "epoch": 6.52, "learning_rate": 1.932469240161548e-05, "loss": 0.8425, "step": 7715, "task_loss": 0.9264959692955017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7541339993476868, "epoch": 6.52, "learning_rate": 1.9319996243073167e-05, "loss": 0.6826, "step": 7716, "task_loss": 0.9786801934242249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3892976939678192, "epoch": 6.52, "learning_rate": 1.9315300084530854e-05, "loss": 0.6385, "step": 7717, "task_loss": 0.15411044657230377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6396268606185913, "epoch": 6.52, "learning_rate": 1.9310603925988543e-05, "loss": 0.9173, "step": 7718, "task_loss": 0.6852754354476929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9400156140327454, "epoch": 6.52, "learning_rate": 1.930590776744623e-05, "loss": 0.8959, "step": 7719, "task_loss": 1.1729614734649658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6031107902526855, "epoch": 6.53, "learning_rate": 1.930121160890392e-05, "loss": 0.6377, "step": 7720, "task_loss": 0.9545750617980957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7786155939102173, "epoch": 6.53, "learning_rate": 1.9296515450361606e-05, "loss": 0.8014, "step": 7721, "task_loss": 0.841705322265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2996140718460083, "epoch": 6.53, "learning_rate": 1.9291819291819292e-05, "loss": 1.2648, "step": 7722, "task_loss": 1.480513334274292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5905527472496033, "epoch": 6.53, "learning_rate": 1.928712313327698e-05, "loss": 0.5655, "step": 7723, "task_loss": 0.2887874245643616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5376182794570923, "epoch": 6.53, "learning_rate": 1.9282426974734668e-05, "loss": 0.7498, "step": 7724, "task_loss": 0.644962728023529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5927246809005737, "epoch": 6.53, "learning_rate": 1.9277730816192354e-05, "loss": 0.7216, "step": 7725, "task_loss": 0.8949723839759827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9754860401153564, "epoch": 6.53, "learning_rate": 1.9273034657650044e-05, "loss": 0.6375, "step": 7726, "task_loss": 0.4873080253601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8372031450271606, "epoch": 6.53, "learning_rate": 1.926833849910773e-05, "loss": 0.751, "step": 7727, "task_loss": 0.8517493605613708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5863825678825378, "epoch": 6.53, "learning_rate": 1.926364234056542e-05, "loss": 0.6045, "step": 7728, "task_loss": 0.3812902867794037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5398223400115967, "epoch": 6.53, "learning_rate": 1.9258946182023103e-05, "loss": 0.6573, "step": 7729, "task_loss": 1.1849589347839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6727845072746277, "epoch": 6.53, "learning_rate": 1.9254250023480793e-05, "loss": 0.6766, "step": 7730, "task_loss": 0.3228757083415985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8819069862365723, "epoch": 6.53, "learning_rate": 1.924955386493848e-05, "loss": 0.7165, "step": 7731, "task_loss": 0.5985199213027954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4298640489578247, "epoch": 6.54, "learning_rate": 1.924485770639617e-05, "loss": 0.6732, "step": 7732, "task_loss": 0.815597414970398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7423425912857056, "epoch": 6.54, "learning_rate": 1.924016154785386e-05, "loss": 0.7915, "step": 7733, "task_loss": 0.6121283769607544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7003265023231506, "epoch": 6.54, "learning_rate": 1.9235465389311545e-05, "loss": 0.6296, "step": 7734, "task_loss": 0.31271976232528687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5254868865013123, "epoch": 6.54, "learning_rate": 1.923076923076923e-05, "loss": 0.7349, "step": 7735, "task_loss": 0.4855116009712219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43674591183662415, "epoch": 6.54, "learning_rate": 1.9226073072226918e-05, "loss": 0.4792, "step": 7736, "task_loss": 0.9132821559906006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7891735434532166, "epoch": 6.54, "learning_rate": 1.9221376913684607e-05, "loss": 0.6492, "step": 7737, "task_loss": 0.7477094531059265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2180988788604736, "epoch": 6.54, "learning_rate": 1.9216680755142294e-05, "loss": 0.7319, "step": 7738, "task_loss": 0.8285298943519592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7939982414245605, "epoch": 6.54, "learning_rate": 1.9211984596599984e-05, "loss": 0.6477, "step": 7739, "task_loss": 0.7834553718566895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6451586484909058, "epoch": 6.54, "learning_rate": 1.920728843805767e-05, "loss": 0.6704, "step": 7740, "task_loss": 0.8189336657524109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8612432479858398, "epoch": 6.54, "learning_rate": 1.9202592279515356e-05, "loss": 0.6677, "step": 7741, "task_loss": 1.3176567554473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34695884585380554, "epoch": 6.54, "learning_rate": 1.9197896120973043e-05, "loss": 0.5649, "step": 7742, "task_loss": 0.2803002595901489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3164924085140228, "epoch": 6.54, "learning_rate": 1.9193199962430732e-05, "loss": 0.5076, "step": 7743, "task_loss": 0.2756063640117645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0373148918151855, "epoch": 6.55, "learning_rate": 1.918850380388842e-05, "loss": 0.7185, "step": 7744, "task_loss": 0.4706798195838928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.391814261674881, "epoch": 6.55, "learning_rate": 1.918380764534611e-05, "loss": 0.7298, "step": 7745, "task_loss": 0.3161371946334839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7492378950119019, "epoch": 6.55, "learning_rate": 1.9179111486803795e-05, "loss": 0.9686, "step": 7746, "task_loss": 0.33143195509910583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2096893787384033, "epoch": 6.55, "learning_rate": 1.9174415328261484e-05, "loss": 0.9531, "step": 7747, "task_loss": 1.294621229171753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5983145833015442, "epoch": 6.55, "learning_rate": 1.916971916971917e-05, "loss": 0.8284, "step": 7748, "task_loss": 1.633280634880066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7714205384254456, "epoch": 6.55, "learning_rate": 1.9165023011176857e-05, "loss": 0.82, "step": 7749, "task_loss": 0.4833499789237976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5647680163383484, "epoch": 6.55, "learning_rate": 1.9160326852634547e-05, "loss": 0.63, "step": 7750, "task_loss": 1.170346975326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4931541085243225, "epoch": 6.55, "learning_rate": 1.9155630694092233e-05, "loss": 0.5985, "step": 7751, "task_loss": 0.4870527684688568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9296172857284546, "epoch": 6.55, "learning_rate": 1.9150934535549923e-05, "loss": 0.7688, "step": 7752, "task_loss": 1.282664179801941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7493938207626343, "epoch": 6.55, "learning_rate": 1.914623837700761e-05, "loss": 0.7958, "step": 7753, "task_loss": 1.1350146532058716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7067694664001465, "epoch": 6.55, "learning_rate": 1.9141542218465296e-05, "loss": 0.7399, "step": 7754, "task_loss": 0.8203844428062439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9400995969772339, "epoch": 6.56, "learning_rate": 1.9136846059922982e-05, "loss": 0.8157, "step": 7755, "task_loss": 0.8471134305000305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7401766180992126, "epoch": 6.56, "learning_rate": 1.913214990138067e-05, "loss": 0.8162, "step": 7756, "task_loss": 1.1807582378387451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4688177704811096, "epoch": 6.56, "learning_rate": 1.9127453742838358e-05, "loss": 0.793, "step": 7757, "task_loss": 0.921956479549408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5760269165039062, "epoch": 6.56, "learning_rate": 1.9122757584296048e-05, "loss": 0.69, "step": 7758, "task_loss": 0.6832701563835144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45360803604125977, "epoch": 6.56, "learning_rate": 1.9118061425753734e-05, "loss": 0.6073, "step": 7759, "task_loss": 0.12004715204238892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4780018925666809, "epoch": 6.56, "learning_rate": 1.9113365267211424e-05, "loss": 0.85, "step": 7760, "task_loss": 0.6116994619369507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5920121073722839, "epoch": 6.56, "learning_rate": 1.9108669108669107e-05, "loss": 0.77, "step": 7761, "task_loss": 0.8503890633583069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6518872976303101, "epoch": 6.56, "learning_rate": 1.9103972950126796e-05, "loss": 0.576, "step": 7762, "task_loss": 0.5870639681816101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8605935573577881, "epoch": 6.56, "learning_rate": 1.9099276791584486e-05, "loss": 0.7268, "step": 7763, "task_loss": 1.2809977531433105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5789771676063538, "epoch": 6.56, "learning_rate": 1.9094580633042173e-05, "loss": 0.587, "step": 7764, "task_loss": 0.8626643419265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7379387021064758, "epoch": 6.56, "learning_rate": 1.9089884474499862e-05, "loss": 0.9264, "step": 7765, "task_loss": 1.3871058225631714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.540118396282196, "epoch": 6.56, "learning_rate": 1.908518831595755e-05, "loss": 0.7965, "step": 7766, "task_loss": 0.28565290570259094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2509283423423767, "epoch": 6.57, "learning_rate": 1.9080492157415235e-05, "loss": 0.6232, "step": 7767, "task_loss": 0.2952711582183838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.854260265827179, "epoch": 6.57, "learning_rate": 1.907579599887292e-05, "loss": 0.7189, "step": 7768, "task_loss": 0.3654499053955078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3097617030143738, "epoch": 6.57, "learning_rate": 1.907109984033061e-05, "loss": 0.4707, "step": 7769, "task_loss": 0.02215125411748886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.827308177947998, "epoch": 6.57, "learning_rate": 1.9066403681788297e-05, "loss": 0.7255, "step": 7770, "task_loss": 0.5571097135543823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9165446162223816, "epoch": 6.57, "learning_rate": 1.9061707523245987e-05, "loss": 0.7839, "step": 7771, "task_loss": 0.8263823390007019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5890278816223145, "epoch": 6.57, "learning_rate": 1.9057011364703673e-05, "loss": 0.583, "step": 7772, "task_loss": 0.9413571953773499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4701464176177979, "epoch": 6.57, "learning_rate": 1.905231520616136e-05, "loss": 0.952, "step": 7773, "task_loss": 1.3535890579223633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5097281336784363, "epoch": 6.57, "learning_rate": 1.9047619047619046e-05, "loss": 0.7686, "step": 7774, "task_loss": 0.38886770606040955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8099279999732971, "epoch": 6.57, "learning_rate": 1.9042922889076736e-05, "loss": 0.6396, "step": 7775, "task_loss": 1.0725030899047852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6936069130897522, "epoch": 6.57, "learning_rate": 1.9038226730534422e-05, "loss": 0.6865, "step": 7776, "task_loss": 0.539017915725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8883570432662964, "epoch": 6.57, "learning_rate": 1.9033530571992112e-05, "loss": 0.649, "step": 7777, "task_loss": 1.0920653343200684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4825749695301056, "epoch": 6.57, "learning_rate": 1.90288344134498e-05, "loss": 0.6692, "step": 7778, "task_loss": 2.15434193611145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5092694759368896, "epoch": 6.58, "learning_rate": 1.9024138254907488e-05, "loss": 0.6297, "step": 7779, "task_loss": 0.6431339383125305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8136820197105408, "epoch": 6.58, "learning_rate": 1.9019442096365174e-05, "loss": 0.597, "step": 7780, "task_loss": 0.6277098059654236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5907455086708069, "epoch": 6.58, "learning_rate": 1.901474593782286e-05, "loss": 0.7354, "step": 7781, "task_loss": 0.5234063267707825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46702224016189575, "epoch": 6.58, "learning_rate": 1.901004977928055e-05, "loss": 0.7372, "step": 7782, "task_loss": 0.35114461183547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6381715536117554, "epoch": 6.58, "learning_rate": 1.9005353620738237e-05, "loss": 0.5827, "step": 7783, "task_loss": 1.1956830024719238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8229279518127441, "epoch": 6.58, "learning_rate": 1.9000657462195926e-05, "loss": 0.899, "step": 7784, "task_loss": 1.060242772102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5737509727478027, "epoch": 6.58, "learning_rate": 1.8995961303653613e-05, "loss": 0.4824, "step": 7785, "task_loss": 0.5266300439834595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6548047065734863, "epoch": 6.58, "learning_rate": 1.89912651451113e-05, "loss": 0.8374, "step": 7786, "task_loss": 0.404236763715744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0868611335754395, "epoch": 6.58, "learning_rate": 1.8986568986568985e-05, "loss": 0.7494, "step": 7787, "task_loss": 1.570975661277771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8569380640983582, "epoch": 6.58, "learning_rate": 1.8981872828026675e-05, "loss": 0.7416, "step": 7788, "task_loss": 1.113991618156433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7101219892501831, "epoch": 6.58, "learning_rate": 1.897717666948436e-05, "loss": 0.5741, "step": 7789, "task_loss": 0.683138370513916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.89030522108078, "epoch": 6.58, "learning_rate": 1.897248051094205e-05, "loss": 0.7974, "step": 7790, "task_loss": 0.7947936654090881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6993842124938965, "epoch": 6.59, "learning_rate": 1.8967784352399738e-05, "loss": 0.7358, "step": 7791, "task_loss": 1.230448842048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3188402652740479, "epoch": 6.59, "learning_rate": 1.8963088193857424e-05, "loss": 0.8977, "step": 7792, "task_loss": 1.454193115234375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.057513952255249, "epoch": 6.59, "learning_rate": 1.895839203531511e-05, "loss": 0.6674, "step": 7793, "task_loss": 0.6343326568603516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.806682825088501, "epoch": 6.59, "learning_rate": 1.89536958767728e-05, "loss": 0.6579, "step": 7794, "task_loss": 0.5346019268035889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5299502611160278, "epoch": 6.59, "learning_rate": 1.894899971823049e-05, "loss": 0.7228, "step": 7795, "task_loss": 0.6528446674346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6716800928115845, "epoch": 6.59, "learning_rate": 1.8944303559688176e-05, "loss": 0.667, "step": 7796, "task_loss": 0.8172768950462341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49140918254852295, "epoch": 6.59, "learning_rate": 1.8939607401145866e-05, "loss": 0.5185, "step": 7797, "task_loss": 0.305518239736557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6331618428230286, "epoch": 6.59, "learning_rate": 1.8934911242603552e-05, "loss": 0.6601, "step": 7798, "task_loss": 0.5063621401786804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3184168338775635, "epoch": 6.59, "learning_rate": 1.893021508406124e-05, "loss": 0.677, "step": 7799, "task_loss": 0.48692333698272705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8533059358596802, "epoch": 6.59, "learning_rate": 1.8925518925518925e-05, "loss": 0.7331, "step": 7800, "task_loss": 0.33693423867225647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.836164116859436, "epoch": 6.59, "learning_rate": 1.8920822766976615e-05, "loss": 0.721, "step": 7801, "task_loss": 0.9489198923110962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7165403962135315, "epoch": 6.59, "learning_rate": 1.89161266084343e-05, "loss": 0.8088, "step": 7802, "task_loss": 1.5076020956039429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8273138999938965, "epoch": 6.6, "learning_rate": 1.891143044989199e-05, "loss": 0.7999, "step": 7803, "task_loss": 1.6975314617156982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7289086580276489, "epoch": 6.6, "learning_rate": 1.8906734291349677e-05, "loss": 0.7288, "step": 7804, "task_loss": 0.8381226062774658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7704528570175171, "epoch": 6.6, "learning_rate": 1.8902038132807363e-05, "loss": 0.9323, "step": 7805, "task_loss": 1.7940747737884521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.256895899772644, "epoch": 6.6, "learning_rate": 1.889734197426505e-05, "loss": 0.9111, "step": 7806, "task_loss": 1.6797608137130737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7859238982200623, "epoch": 6.6, "learning_rate": 1.889264581572274e-05, "loss": 0.7906, "step": 7807, "task_loss": 1.5870091915130615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0073221921920776, "epoch": 6.6, "learning_rate": 1.8887949657180426e-05, "loss": 0.5878, "step": 7808, "task_loss": 0.6206783652305603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6592059135437012, "epoch": 6.6, "learning_rate": 1.8883253498638115e-05, "loss": 0.5486, "step": 7809, "task_loss": 1.5339548587799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2701926231384277, "epoch": 6.6, "learning_rate": 1.8878557340095805e-05, "loss": 0.856, "step": 7810, "task_loss": 0.8669659495353699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7515615224838257, "epoch": 6.6, "learning_rate": 1.887386118155349e-05, "loss": 0.72, "step": 7811, "task_loss": 1.1863491535186768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5042928457260132, "epoch": 6.6, "learning_rate": 1.8869165023011178e-05, "loss": 0.6659, "step": 7812, "task_loss": 0.691389799118042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8114071488380432, "epoch": 6.6, "learning_rate": 1.8864468864468864e-05, "loss": 0.7502, "step": 7813, "task_loss": 0.3721247911453247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38322579860687256, "epoch": 6.6, "learning_rate": 1.8859772705926554e-05, "loss": 0.6483, "step": 7814, "task_loss": 0.061326153576374054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5933911204338074, "epoch": 6.61, "learning_rate": 1.885507654738424e-05, "loss": 0.7209, "step": 7815, "task_loss": 1.241695523262024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5378559231758118, "epoch": 6.61, "learning_rate": 1.885038038884193e-05, "loss": 0.6551, "step": 7816, "task_loss": 0.23212647438049316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.746422290802002, "epoch": 6.61, "learning_rate": 1.8845684230299616e-05, "loss": 0.646, "step": 7817, "task_loss": 1.8283711671829224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47464901208877563, "epoch": 6.61, "learning_rate": 1.8840988071757303e-05, "loss": 0.6529, "step": 7818, "task_loss": 0.46652907133102417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8120818138122559, "epoch": 6.61, "learning_rate": 1.883629191321499e-05, "loss": 0.6, "step": 7819, "task_loss": 0.4492277204990387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6828403472900391, "epoch": 6.61, "learning_rate": 1.883159575467268e-05, "loss": 0.6923, "step": 7820, "task_loss": 0.9340840578079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7717525959014893, "epoch": 6.61, "learning_rate": 1.8826899596130365e-05, "loss": 0.7275, "step": 7821, "task_loss": 0.6896119713783264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7674301266670227, "epoch": 6.61, "learning_rate": 1.8822203437588055e-05, "loss": 0.7601, "step": 7822, "task_loss": 1.0302557945251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6335337162017822, "epoch": 6.61, "learning_rate": 1.881750727904574e-05, "loss": 0.7603, "step": 7823, "task_loss": 0.5713998675346375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8188178539276123, "epoch": 6.61, "learning_rate": 1.8812811120503427e-05, "loss": 0.6082, "step": 7824, "task_loss": 1.0623761415481567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28127920627593994, "epoch": 6.61, "learning_rate": 1.8808114961961117e-05, "loss": 0.4432, "step": 7825, "task_loss": 0.21630476415157318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065192222595215, "epoch": 6.61, "learning_rate": 1.8803418803418804e-05, "loss": 0.6879, "step": 7826, "task_loss": 1.3084105253219604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9372395277023315, "epoch": 6.62, "learning_rate": 1.8798722644876493e-05, "loss": 0.8074, "step": 7827, "task_loss": 0.8304434418678284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8588280081748962, "epoch": 6.62, "learning_rate": 1.879402648633418e-05, "loss": 0.9756, "step": 7828, "task_loss": 1.5051504373550415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8606330156326294, "epoch": 6.62, "learning_rate": 1.878933032779187e-05, "loss": 0.8129, "step": 7829, "task_loss": 1.0700019598007202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6594505310058594, "epoch": 6.62, "learning_rate": 1.8784634169249556e-05, "loss": 0.7928, "step": 7830, "task_loss": 0.47969910502433777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7884535789489746, "epoch": 6.62, "learning_rate": 1.8779938010707242e-05, "loss": 0.6013, "step": 7831, "task_loss": 0.9770629405975342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2070974111557007, "epoch": 6.62, "learning_rate": 1.877524185216493e-05, "loss": 0.7962, "step": 7832, "task_loss": 0.6993933320045471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40967270731925964, "epoch": 6.62, "learning_rate": 1.8770545693622618e-05, "loss": 0.6595, "step": 7833, "task_loss": 0.7899876236915588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7396615147590637, "epoch": 6.62, "learning_rate": 1.8765849535080304e-05, "loss": 0.954, "step": 7834, "task_loss": 0.5316251516342163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.68337082862854, "epoch": 6.62, "learning_rate": 1.8761153376537994e-05, "loss": 0.7433, "step": 7835, "task_loss": 1.0057151317596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43260130286216736, "epoch": 6.62, "learning_rate": 1.875645721799568e-05, "loss": 0.6786, "step": 7836, "task_loss": 0.7278441190719604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9949042797088623, "epoch": 6.62, "learning_rate": 1.8751761059453367e-05, "loss": 0.7617, "step": 7837, "task_loss": 0.6457927823066711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6649297475814819, "epoch": 6.63, "learning_rate": 1.8747064900911053e-05, "loss": 0.6905, "step": 7838, "task_loss": 0.4780188500881195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7206320762634277, "epoch": 6.63, "learning_rate": 1.8742368742368743e-05, "loss": 0.8024, "step": 7839, "task_loss": 1.5674070119857788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.151001214981079, "epoch": 6.63, "learning_rate": 1.8737672583826433e-05, "loss": 0.7241, "step": 7840, "task_loss": 1.2793731689453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22347110509872437, "epoch": 6.63, "learning_rate": 1.873297642528412e-05, "loss": 0.5819, "step": 7841, "task_loss": 0.0804326981306076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0909905433654785, "epoch": 6.63, "learning_rate": 1.872828026674181e-05, "loss": 0.8094, "step": 7842, "task_loss": 1.0655397176742554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.725278377532959, "epoch": 6.63, "learning_rate": 1.8723584108199495e-05, "loss": 0.7924, "step": 7843, "task_loss": 0.9503967761993408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.981561005115509, "epoch": 6.63, "learning_rate": 1.871888794965718e-05, "loss": 0.7637, "step": 7844, "task_loss": 1.4444925785064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6943938732147217, "epoch": 6.63, "learning_rate": 1.8714191791114868e-05, "loss": 0.8357, "step": 7845, "task_loss": 1.0910625457763672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.886143684387207, "epoch": 6.63, "learning_rate": 1.8709495632572557e-05, "loss": 0.9499, "step": 7846, "task_loss": 2.3945388793945312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9058287143707275, "epoch": 6.63, "learning_rate": 1.8704799474030244e-05, "loss": 0.7592, "step": 7847, "task_loss": 0.6217558979988098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6692575216293335, "epoch": 6.63, "learning_rate": 1.8700103315487934e-05, "loss": 0.6718, "step": 7848, "task_loss": 0.6496251225471497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.695428729057312, "epoch": 6.63, "learning_rate": 1.869540715694562e-05, "loss": 0.7254, "step": 7849, "task_loss": 0.7545936703681946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5767747759819031, "epoch": 6.64, "learning_rate": 1.8690710998403306e-05, "loss": 0.549, "step": 7850, "task_loss": 1.1683772802352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.473606824874878, "epoch": 6.64, "learning_rate": 1.8686014839860993e-05, "loss": 0.8785, "step": 7851, "task_loss": 0.6885291934013367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7994762659072876, "epoch": 6.64, "learning_rate": 1.8681318681318682e-05, "loss": 0.7681, "step": 7852, "task_loss": 0.9728848338127136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3819310665130615, "epoch": 6.64, "learning_rate": 1.867662252277637e-05, "loss": 0.8053, "step": 7853, "task_loss": 1.0391877889633179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49070021510124207, "epoch": 6.64, "learning_rate": 1.867192636423406e-05, "loss": 0.7229, "step": 7854, "task_loss": 0.10621807724237442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.667739748954773, "epoch": 6.64, "learning_rate": 1.8667230205691748e-05, "loss": 0.5962, "step": 7855, "task_loss": 0.7288939952850342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7713451385498047, "epoch": 6.64, "learning_rate": 1.866253404714943e-05, "loss": 0.8561, "step": 7856, "task_loss": 1.4917329549789429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9241687059402466, "epoch": 6.64, "learning_rate": 1.865783788860712e-05, "loss": 0.6508, "step": 7857, "task_loss": 0.5608865022659302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6861218810081482, "epoch": 6.64, "learning_rate": 1.8653141730064807e-05, "loss": 0.6652, "step": 7858, "task_loss": 0.2603943347930908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1871118545532227, "epoch": 6.64, "learning_rate": 1.8648445571522497e-05, "loss": 0.8852, "step": 7859, "task_loss": 1.111939787864685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.410509318113327, "epoch": 6.64, "learning_rate": 1.8643749412980183e-05, "loss": 0.5672, "step": 7860, "task_loss": 0.538434624671936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3582940995693207, "epoch": 6.64, "learning_rate": 1.8639053254437873e-05, "loss": 0.6643, "step": 7861, "task_loss": 0.03490319475531578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9138627052307129, "epoch": 6.65, "learning_rate": 1.863435709589556e-05, "loss": 0.758, "step": 7862, "task_loss": 0.8516718745231628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4916948676109314, "epoch": 6.65, "learning_rate": 1.8629660937353246e-05, "loss": 0.6038, "step": 7863, "task_loss": 0.909015953540802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7165804505348206, "epoch": 6.65, "learning_rate": 1.8624964778810932e-05, "loss": 0.5611, "step": 7864, "task_loss": 0.69620680809021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0723464488983154, "epoch": 6.65, "learning_rate": 1.862026862026862e-05, "loss": 0.7505, "step": 7865, "task_loss": 1.4382755756378174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3903179168701172, "epoch": 6.65, "learning_rate": 1.8615572461726308e-05, "loss": 0.6346, "step": 7866, "task_loss": 0.2761421203613281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7648067474365234, "epoch": 6.65, "learning_rate": 1.8610876303183998e-05, "loss": 0.6708, "step": 7867, "task_loss": 1.2758221626281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6692013740539551, "epoch": 6.65, "learning_rate": 1.8606180144641684e-05, "loss": 0.7081, "step": 7868, "task_loss": 0.6584583520889282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3371739685535431, "epoch": 6.65, "learning_rate": 1.860148398609937e-05, "loss": 0.6335, "step": 7869, "task_loss": 0.6395004987716675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4584331512451172, "epoch": 6.65, "learning_rate": 1.8596787827557057e-05, "loss": 0.5971, "step": 7870, "task_loss": 1.3012323379516602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9976335167884827, "epoch": 6.65, "learning_rate": 1.8592091669014746e-05, "loss": 0.8223, "step": 7871, "task_loss": 1.2253007888793945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4941475987434387, "epoch": 6.65, "learning_rate": 1.8587395510472436e-05, "loss": 0.7129, "step": 7872, "task_loss": 1.3460168838500977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8650755286216736, "epoch": 6.65, "learning_rate": 1.8582699351930122e-05, "loss": 0.6712, "step": 7873, "task_loss": 1.1510897874832153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9519252777099609, "epoch": 6.66, "learning_rate": 1.8578003193387812e-05, "loss": 0.7787, "step": 7874, "task_loss": 0.6160178184509277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9744563102722168, "epoch": 6.66, "learning_rate": 1.8573307034845495e-05, "loss": 0.8978, "step": 7875, "task_loss": 0.7912254929542542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5862891674041748, "epoch": 6.66, "learning_rate": 1.8568610876303185e-05, "loss": 0.6286, "step": 7876, "task_loss": 0.17454218864440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.847195565700531, "epoch": 6.66, "learning_rate": 1.856391471776087e-05, "loss": 0.8001, "step": 7877, "task_loss": 1.1676759719848633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.19663405418396, "epoch": 6.66, "learning_rate": 1.855921855921856e-05, "loss": 0.8, "step": 7878, "task_loss": 1.4962078332901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9336768388748169, "epoch": 6.66, "learning_rate": 1.8554522400676247e-05, "loss": 0.6937, "step": 7879, "task_loss": 1.0022307634353638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7022480964660645, "epoch": 6.66, "learning_rate": 1.8549826242133937e-05, "loss": 0.6243, "step": 7880, "task_loss": 0.301519513130188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7668818831443787, "epoch": 6.66, "learning_rate": 1.8545130083591623e-05, "loss": 0.7655, "step": 7881, "task_loss": 0.9754584431648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6367032527923584, "epoch": 6.66, "learning_rate": 1.854043392504931e-05, "loss": 0.6221, "step": 7882, "task_loss": 0.3960645794868469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5827299356460571, "epoch": 6.66, "learning_rate": 1.8535737766506996e-05, "loss": 0.6088, "step": 7883, "task_loss": 0.381015419960022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1089345216751099, "epoch": 6.66, "learning_rate": 1.8531041607964686e-05, "loss": 0.8348, "step": 7884, "task_loss": 0.7361460328102112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6794453859329224, "epoch": 6.66, "learning_rate": 1.8526345449422372e-05, "loss": 0.9032, "step": 7885, "task_loss": 1.330956220626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7649685144424438, "epoch": 6.67, "learning_rate": 1.8521649290880062e-05, "loss": 0.6831, "step": 7886, "task_loss": 1.0181002616882324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4819204807281494, "epoch": 6.67, "learning_rate": 1.8516953132337748e-05, "loss": 0.766, "step": 7887, "task_loss": 1.3499748706817627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5072662830352783, "epoch": 6.67, "learning_rate": 1.8512256973795435e-05, "loss": 0.676, "step": 7888, "task_loss": 0.8055269122123718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9185647368431091, "epoch": 6.67, "learning_rate": 1.8507560815253124e-05, "loss": 0.8706, "step": 7889, "task_loss": 1.2536611557006836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46412649750709534, "epoch": 6.67, "learning_rate": 1.850286465671081e-05, "loss": 0.5188, "step": 7890, "task_loss": 1.0379188060760498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36142587661743164, "epoch": 6.67, "learning_rate": 1.84981684981685e-05, "loss": 0.4808, "step": 7891, "task_loss": 0.23987053334712982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8084214329719543, "epoch": 6.67, "learning_rate": 1.8493472339626187e-05, "loss": 0.6773, "step": 7892, "task_loss": 0.5548166632652283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4335891604423523, "epoch": 6.67, "learning_rate": 1.8488776181083876e-05, "loss": 0.6865, "step": 7893, "task_loss": 1.1114609241485596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5552604794502258, "epoch": 6.67, "learning_rate": 1.8484080022541563e-05, "loss": 0.7126, "step": 7894, "task_loss": 1.2031651735305786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6247009038925171, "epoch": 6.67, "learning_rate": 1.847938386399925e-05, "loss": 0.5654, "step": 7895, "task_loss": 0.7995830178260803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33611488342285156, "epoch": 6.67, "learning_rate": 1.8474687705456935e-05, "loss": 0.6386, "step": 7896, "task_loss": 0.6066705584526062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.009952187538147, "epoch": 6.67, "learning_rate": 1.8469991546914625e-05, "loss": 0.9248, "step": 7897, "task_loss": 1.1409581899642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8554931282997131, "epoch": 6.68, "learning_rate": 1.846529538837231e-05, "loss": 0.7365, "step": 7898, "task_loss": 0.7813534736633301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6962049007415771, "epoch": 6.68, "learning_rate": 1.846059922983e-05, "loss": 0.7716, "step": 7899, "task_loss": 1.0813212394714355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3811628818511963, "epoch": 6.68, "learning_rate": 1.8455903071287688e-05, "loss": 0.5666, "step": 7900, "task_loss": 0.3818693459033966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8189781904220581, "epoch": 6.68, "learning_rate": 1.8451206912745374e-05, "loss": 0.6425, "step": 7901, "task_loss": 0.6894546151161194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8266512751579285, "epoch": 6.68, "learning_rate": 1.8446510754203064e-05, "loss": 0.6748, "step": 7902, "task_loss": 0.8933721780776978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3638013005256653, "epoch": 6.68, "learning_rate": 1.844181459566075e-05, "loss": 0.5379, "step": 7903, "task_loss": 0.7172104120254517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7087752819061279, "epoch": 6.68, "learning_rate": 1.843711843711844e-05, "loss": 0.9135, "step": 7904, "task_loss": 1.932684063911438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48375391960144043, "epoch": 6.68, "learning_rate": 1.8432422278576126e-05, "loss": 0.7564, "step": 7905, "task_loss": 1.0751687288284302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5894186496734619, "epoch": 6.68, "learning_rate": 1.8427726120033816e-05, "loss": 0.8414, "step": 7906, "task_loss": 0.8775391578674316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8144789934158325, "epoch": 6.68, "learning_rate": 1.84230299614915e-05, "loss": 0.5902, "step": 7907, "task_loss": 0.7461346983909607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0533013343811035, "epoch": 6.68, "learning_rate": 1.841833380294919e-05, "loss": 0.6641, "step": 7908, "task_loss": 0.40353822708129883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6165851354598999, "epoch": 6.69, "learning_rate": 1.8413637644406875e-05, "loss": 0.7678, "step": 7909, "task_loss": 0.6708202958106995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6888418793678284, "epoch": 6.69, "learning_rate": 1.8408941485864564e-05, "loss": 0.6151, "step": 7910, "task_loss": 0.9505549669265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4211847484111786, "epoch": 6.69, "learning_rate": 1.840424532732225e-05, "loss": 0.5525, "step": 7911, "task_loss": 0.32170945405960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6918339133262634, "epoch": 6.69, "learning_rate": 1.839954916877994e-05, "loss": 0.6341, "step": 7912, "task_loss": 0.8993133902549744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8506077527999878, "epoch": 6.69, "learning_rate": 1.8394853010237627e-05, "loss": 0.6666, "step": 7913, "task_loss": 0.5703252553939819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6750425100326538, "epoch": 6.69, "learning_rate": 1.8390156851695313e-05, "loss": 0.8671, "step": 7914, "task_loss": 0.9089783430099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.574044942855835, "epoch": 6.69, "learning_rate": 1.8385460693153e-05, "loss": 0.5112, "step": 7915, "task_loss": 1.2618130445480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5041455030441284, "epoch": 6.69, "learning_rate": 1.838076453461069e-05, "loss": 0.6554, "step": 7916, "task_loss": 0.8494638204574585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7480868697166443, "epoch": 6.69, "learning_rate": 1.837606837606838e-05, "loss": 0.848, "step": 7917, "task_loss": 0.3841010630130768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37854355573654175, "epoch": 6.69, "learning_rate": 1.8371372217526065e-05, "loss": 0.7072, "step": 7918, "task_loss": 0.0911397933959961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8841831684112549, "epoch": 6.69, "learning_rate": 1.8366676058983752e-05, "loss": 0.689, "step": 7919, "task_loss": 0.7403742671012878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7463845014572144, "epoch": 6.69, "learning_rate": 1.8361979900441438e-05, "loss": 0.7868, "step": 7920, "task_loss": 0.7863142490386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1865637302398682, "epoch": 6.7, "learning_rate": 1.8357283741899128e-05, "loss": 0.8119, "step": 7921, "task_loss": 1.0542932748794556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8660508990287781, "epoch": 6.7, "learning_rate": 1.8352587583356814e-05, "loss": 0.7222, "step": 7922, "task_loss": 0.2783169150352478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7154989838600159, "epoch": 6.7, "learning_rate": 1.8347891424814504e-05, "loss": 0.6366, "step": 7923, "task_loss": 0.7387536764144897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8136405944824219, "epoch": 6.7, "learning_rate": 1.834319526627219e-05, "loss": 0.7286, "step": 7924, "task_loss": 0.4507836699485779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6038713455200195, "epoch": 6.7, "learning_rate": 1.833849910772988e-05, "loss": 0.5986, "step": 7925, "task_loss": 0.5910147428512573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.534325122833252, "epoch": 6.7, "learning_rate": 1.8333802949187566e-05, "loss": 0.6766, "step": 7926, "task_loss": 0.6559394598007202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7273287773132324, "epoch": 6.7, "learning_rate": 1.8329106790645253e-05, "loss": 0.5199, "step": 7927, "task_loss": 0.5171031951904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5430275797843933, "epoch": 6.7, "learning_rate": 1.832441063210294e-05, "loss": 0.6632, "step": 7928, "task_loss": 0.7681781649589539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9331591129302979, "epoch": 6.7, "learning_rate": 1.831971447356063e-05, "loss": 0.7638, "step": 7929, "task_loss": 0.8586137294769287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.574021577835083, "epoch": 6.7, "learning_rate": 1.8315018315018315e-05, "loss": 1.0647, "step": 7930, "task_loss": 0.9498838186264038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48796409368515015, "epoch": 6.7, "learning_rate": 1.8310322156476005e-05, "loss": 0.6966, "step": 7931, "task_loss": 0.5110630393028259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6608863472938538, "epoch": 6.7, "learning_rate": 1.830562599793369e-05, "loss": 0.8187, "step": 7932, "task_loss": 1.179255723953247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5754683017730713, "epoch": 6.71, "learning_rate": 1.8300929839391377e-05, "loss": 0.6756, "step": 7933, "task_loss": 0.7088960409164429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.634117603302002, "epoch": 6.71, "learning_rate": 1.8296233680849067e-05, "loss": 0.7904, "step": 7934, "task_loss": 0.4011755883693695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6617614030838013, "epoch": 6.71, "learning_rate": 1.8291537522306753e-05, "loss": 0.7491, "step": 7935, "task_loss": 1.725034475326538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7297960519790649, "epoch": 6.71, "learning_rate": 1.8286841363764443e-05, "loss": 0.6529, "step": 7936, "task_loss": 1.0573828220367432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9059305787086487, "epoch": 6.71, "learning_rate": 1.828214520522213e-05, "loss": 0.8028, "step": 7937, "task_loss": 1.1649330854415894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.659383237361908, "epoch": 6.71, "learning_rate": 1.827744904667982e-05, "loss": 0.544, "step": 7938, "task_loss": 0.113300621509552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0751678943634033, "epoch": 6.71, "learning_rate": 1.8272752888137502e-05, "loss": 0.5793, "step": 7939, "task_loss": 1.0219407081604004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1095887422561646, "epoch": 6.71, "learning_rate": 1.8268056729595192e-05, "loss": 0.8892, "step": 7940, "task_loss": 1.1047041416168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7027661204338074, "epoch": 6.71, "learning_rate": 1.8263360571052878e-05, "loss": 0.8473, "step": 7941, "task_loss": 1.01369047164917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3772653043270111, "epoch": 6.71, "learning_rate": 1.8258664412510568e-05, "loss": 0.405, "step": 7942, "task_loss": 0.8380016088485718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6938070058822632, "epoch": 6.71, "learning_rate": 1.8253968253968254e-05, "loss": 0.5531, "step": 7943, "task_loss": 0.5890017151832581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39807236194610596, "epoch": 6.71, "learning_rate": 1.8249272095425944e-05, "loss": 0.8595, "step": 7944, "task_loss": 0.29542940855026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6868453025817871, "epoch": 6.72, "learning_rate": 1.824457593688363e-05, "loss": 0.8595, "step": 7945, "task_loss": 0.6749566197395325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4321601688861847, "epoch": 6.72, "learning_rate": 1.8239879778341317e-05, "loss": 0.6605, "step": 7946, "task_loss": 0.5932649374008179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47573405504226685, "epoch": 6.72, "learning_rate": 1.8235183619799003e-05, "loss": 0.6114, "step": 7947, "task_loss": 0.36540669202804565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.179674744606018, "epoch": 6.72, "learning_rate": 1.8230487461256693e-05, "loss": 0.6964, "step": 7948, "task_loss": 1.3381692171096802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27693408727645874, "epoch": 6.72, "learning_rate": 1.8225791302714383e-05, "loss": 0.4121, "step": 7949, "task_loss": 0.34034907817840576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45274123549461365, "epoch": 6.72, "learning_rate": 1.822109514417207e-05, "loss": 0.6604, "step": 7950, "task_loss": 0.7094851136207581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.86525559425354, "epoch": 6.72, "learning_rate": 1.8216398985629755e-05, "loss": 0.8097, "step": 7951, "task_loss": 1.4980319738388062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7549800276756287, "epoch": 6.72, "learning_rate": 1.821170282708744e-05, "loss": 0.598, "step": 7952, "task_loss": 0.9671727418899536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7765780687332153, "epoch": 6.72, "learning_rate": 1.820700666854513e-05, "loss": 0.7074, "step": 7953, "task_loss": 0.9649078845977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.637029230594635, "epoch": 6.72, "learning_rate": 1.8202310510002818e-05, "loss": 0.6844, "step": 7954, "task_loss": 0.25865480303764343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8664615154266357, "epoch": 6.72, "learning_rate": 1.8197614351460507e-05, "loss": 0.6513, "step": 7955, "task_loss": 0.8578440546989441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41346997022628784, "epoch": 6.72, "learning_rate": 1.8192918192918194e-05, "loss": 0.5843, "step": 7956, "task_loss": 0.6374626755714417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27111926674842834, "epoch": 6.73, "learning_rate": 1.8188222034375883e-05, "loss": 0.8302, "step": 7957, "task_loss": 0.6497591733932495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6577046513557434, "epoch": 6.73, "learning_rate": 1.8183525875833566e-05, "loss": 0.6675, "step": 7958, "task_loss": 0.8724526762962341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6831278800964355, "epoch": 6.73, "learning_rate": 1.8178829717291256e-05, "loss": 0.592, "step": 7959, "task_loss": 1.1061406135559082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4870046377182007, "epoch": 6.73, "learning_rate": 1.8174133558748942e-05, "loss": 0.482, "step": 7960, "task_loss": 0.828632652759552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.694338321685791, "epoch": 6.73, "learning_rate": 1.8169437400206632e-05, "loss": 0.7124, "step": 7961, "task_loss": 0.6959808468818665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7595100998878479, "epoch": 6.73, "learning_rate": 1.816474124166432e-05, "loss": 0.7484, "step": 7962, "task_loss": 0.2699923515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6249562501907349, "epoch": 6.73, "learning_rate": 1.8160045083122008e-05, "loss": 0.4847, "step": 7963, "task_loss": 0.25175535678863525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0113379955291748, "epoch": 6.73, "learning_rate": 1.8155348924579695e-05, "loss": 0.664, "step": 7964, "task_loss": 0.7499567270278931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6708813905715942, "epoch": 6.73, "learning_rate": 1.815065276603738e-05, "loss": 0.6245, "step": 7965, "task_loss": 0.8841532468795776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45417991280555725, "epoch": 6.73, "learning_rate": 1.814595660749507e-05, "loss": 0.794, "step": 7966, "task_loss": 0.6017684936523438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9415045380592346, "epoch": 6.73, "learning_rate": 1.8141260448952757e-05, "loss": 0.6708, "step": 7967, "task_loss": 0.810107409954071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5682117938995361, "epoch": 6.73, "learning_rate": 1.8136564290410447e-05, "loss": 0.7, "step": 7968, "task_loss": 0.8896453976631165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7057149410247803, "epoch": 6.74, "learning_rate": 1.8131868131868133e-05, "loss": 0.5911, "step": 7969, "task_loss": 1.0541017055511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3720572590827942, "epoch": 6.74, "learning_rate": 1.812717197332582e-05, "loss": 0.5753, "step": 7970, "task_loss": 0.5229494571685791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26686209440231323, "epoch": 6.74, "learning_rate": 1.8122475814783506e-05, "loss": 0.5754, "step": 7971, "task_loss": 0.39854317903518677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7378036975860596, "epoch": 6.74, "learning_rate": 1.8117779656241195e-05, "loss": 0.7868, "step": 7972, "task_loss": 0.35473713278770447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7486150860786438, "epoch": 6.74, "learning_rate": 1.8113083497698882e-05, "loss": 0.6998, "step": 7973, "task_loss": 1.037705421447754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7320632934570312, "epoch": 6.74, "learning_rate": 1.810838733915657e-05, "loss": 0.789, "step": 7974, "task_loss": 0.46460893750190735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6991386413574219, "epoch": 6.74, "learning_rate": 1.8103691180614258e-05, "loss": 0.7022, "step": 7975, "task_loss": 0.2743189036846161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7205047607421875, "epoch": 6.74, "learning_rate": 1.8098995022071948e-05, "loss": 0.6746, "step": 7976, "task_loss": 0.9362558722496033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6944296360015869, "epoch": 6.74, "learning_rate": 1.8094298863529634e-05, "loss": 0.6601, "step": 7977, "task_loss": 0.454405814409256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5766761302947998, "epoch": 6.74, "learning_rate": 1.808960270498732e-05, "loss": 0.7658, "step": 7978, "task_loss": 0.8851684927940369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.096362829208374, "epoch": 6.74, "learning_rate": 1.808490654644501e-05, "loss": 0.8801, "step": 7979, "task_loss": 0.5858886241912842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8550338745117188, "epoch": 6.75, "learning_rate": 1.8080210387902696e-05, "loss": 0.743, "step": 7980, "task_loss": 0.9079235196113586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2198041677474976, "epoch": 6.75, "learning_rate": 1.8075514229360386e-05, "loss": 0.8365, "step": 7981, "task_loss": 1.197420358657837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6381044387817383, "epoch": 6.75, "learning_rate": 1.8070818070818072e-05, "loss": 0.702, "step": 7982, "task_loss": 0.3985850512981415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6946704387664795, "epoch": 6.75, "learning_rate": 1.806612191227576e-05, "loss": 0.9497, "step": 7983, "task_loss": 0.697201669216156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23690593242645264, "epoch": 6.75, "learning_rate": 1.8061425753733445e-05, "loss": 0.707, "step": 7984, "task_loss": 0.27074435353279114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5232446193695068, "epoch": 6.75, "learning_rate": 1.8056729595191135e-05, "loss": 0.6158, "step": 7985, "task_loss": 0.5616576075553894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.515991747379303, "epoch": 6.75, "learning_rate": 1.805203343664882e-05, "loss": 0.803, "step": 7986, "task_loss": 1.1551685333251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8239456415176392, "epoch": 6.75, "learning_rate": 1.804733727810651e-05, "loss": 0.8247, "step": 7987, "task_loss": 0.693233847618103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.751591682434082, "epoch": 6.75, "learning_rate": 1.8042641119564197e-05, "loss": 0.6679, "step": 7988, "task_loss": 0.9313406348228455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.038351058959961, "epoch": 6.75, "learning_rate": 1.8037944961021887e-05, "loss": 0.7906, "step": 7989, "task_loss": 0.8524459600448608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1990362405776978, "epoch": 6.75, "learning_rate": 1.803324880247957e-05, "loss": 0.9134, "step": 7990, "task_loss": 1.4911595582962036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8936008214950562, "epoch": 6.75, "learning_rate": 1.802855264393726e-05, "loss": 0.8676, "step": 7991, "task_loss": 0.42948809266090393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8148795366287231, "epoch": 6.76, "learning_rate": 1.8023856485394946e-05, "loss": 0.7236, "step": 7992, "task_loss": 0.7880675792694092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7130094766616821, "epoch": 6.76, "learning_rate": 1.8019160326852636e-05, "loss": 0.6065, "step": 7993, "task_loss": 0.8865887522697449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4009692668914795, "epoch": 6.76, "learning_rate": 1.8014464168310325e-05, "loss": 0.5261, "step": 7994, "task_loss": 0.19225730001926422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7292789816856384, "epoch": 6.76, "learning_rate": 1.8009768009768012e-05, "loss": 0.7411, "step": 7995, "task_loss": 1.9855096340179443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.632171094417572, "epoch": 6.76, "learning_rate": 1.8005071851225698e-05, "loss": 0.6413, "step": 7996, "task_loss": 0.725261926651001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7210355997085571, "epoch": 6.76, "learning_rate": 1.8000375692683384e-05, "loss": 0.6499, "step": 7997, "task_loss": 0.7822661995887756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.75420081615448, "epoch": 6.76, "learning_rate": 1.7995679534141074e-05, "loss": 0.6456, "step": 7998, "task_loss": 1.437920331954956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9440190196037292, "epoch": 6.76, "learning_rate": 1.799098337559876e-05, "loss": 0.7851, "step": 7999, "task_loss": 0.5991203188896179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7499675750732422, "epoch": 6.76, "learning_rate": 1.798628721705645e-05, "loss": 0.7319, "step": 8000, "task_loss": 0.5333141684532166 }, { "epoch": 6.76, "eval_accuracy": 0.8933069306930693, "eval_loss": 0.4493527114391327, "eval_runtime": 225.2937, "eval_samples_per_second": 112.076, "eval_steps_per_second": 0.879, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5895742177963257, "epoch": 6.76, "learning_rate": 1.7981591058514137e-05, "loss": 0.8241, "step": 8001, "task_loss": 1.0977030992507935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0360515117645264, "epoch": 6.76, "learning_rate": 1.7976894899971823e-05, "loss": 0.6784, "step": 8002, "task_loss": 0.7299138903617859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.370789647102356, "epoch": 6.76, "learning_rate": 1.797219874142951e-05, "loss": 0.7494, "step": 8003, "task_loss": 1.210050344467163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4126530587673187, "epoch": 6.77, "learning_rate": 1.79675025828872e-05, "loss": 0.7112, "step": 8004, "task_loss": 0.6391266584396362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8201828002929688, "epoch": 6.77, "learning_rate": 1.7962806424344885e-05, "loss": 1.1058, "step": 8005, "task_loss": 1.5582082271575928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5877934694290161, "epoch": 6.77, "learning_rate": 1.7958110265802575e-05, "loss": 0.8139, "step": 8006, "task_loss": 1.2045164108276367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5264323949813843, "epoch": 6.77, "learning_rate": 1.795341410726026e-05, "loss": 0.6626, "step": 8007, "task_loss": 1.127972960472107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1265079975128174, "epoch": 6.77, "learning_rate": 1.794871794871795e-05, "loss": 0.71, "step": 8008, "task_loss": 0.4674353301525116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6414729356765747, "epoch": 6.77, "learning_rate": 1.7944021790175637e-05, "loss": 0.7546, "step": 8009, "task_loss": 0.7259740829467773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4719139039516449, "epoch": 6.77, "learning_rate": 1.7939325631633324e-05, "loss": 0.5668, "step": 8010, "task_loss": 0.326391339302063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5669357776641846, "epoch": 6.77, "learning_rate": 1.7934629473091014e-05, "loss": 0.7682, "step": 8011, "task_loss": 0.5344647169113159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4654301106929779, "epoch": 6.77, "learning_rate": 1.79299333145487e-05, "loss": 0.6913, "step": 8012, "task_loss": 0.8966190218925476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5026566982269287, "epoch": 6.77, "learning_rate": 1.792523715600639e-05, "loss": 0.7439, "step": 8013, "task_loss": 0.5133823752403259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45161157846450806, "epoch": 6.77, "learning_rate": 1.7920540997464076e-05, "loss": 0.4881, "step": 8014, "task_loss": 1.409341812133789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6472766399383545, "epoch": 6.77, "learning_rate": 1.7915844838921762e-05, "loss": 0.6912, "step": 8015, "task_loss": 0.7846285104751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8106926679611206, "epoch": 6.78, "learning_rate": 1.791114868037945e-05, "loss": 0.7025, "step": 8016, "task_loss": 1.1543165445327759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6456445455551147, "epoch": 6.78, "learning_rate": 1.790645252183714e-05, "loss": 0.8015, "step": 8017, "task_loss": 0.7076823115348816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.030876874923706, "epoch": 6.78, "learning_rate": 1.7901756363294825e-05, "loss": 0.7952, "step": 8018, "task_loss": 2.3905186653137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3849503695964813, "epoch": 6.78, "learning_rate": 1.7897060204752514e-05, "loss": 0.7707, "step": 8019, "task_loss": 0.22061610221862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8599023818969727, "epoch": 6.78, "learning_rate": 1.78923640462102e-05, "loss": 0.6732, "step": 8020, "task_loss": 0.8688079118728638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8625783920288086, "epoch": 6.78, "learning_rate": 1.788766788766789e-05, "loss": 0.8248, "step": 8021, "task_loss": 0.9139476418495178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9382263422012329, "epoch": 6.78, "learning_rate": 1.7882971729125573e-05, "loss": 0.8145, "step": 8022, "task_loss": 1.362126350402832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34017205238342285, "epoch": 6.78, "learning_rate": 1.7878275570583263e-05, "loss": 0.6446, "step": 8023, "task_loss": 0.1698073148727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6742199659347534, "epoch": 6.78, "learning_rate": 1.787357941204095e-05, "loss": 0.6398, "step": 8024, "task_loss": 0.8295663595199585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3501206636428833, "epoch": 6.78, "learning_rate": 1.786888325349864e-05, "loss": 0.5796, "step": 8025, "task_loss": 0.5304601192474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7201451063156128, "epoch": 6.78, "learning_rate": 1.786418709495633e-05, "loss": 0.6694, "step": 8026, "task_loss": 0.7510545253753662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5976323485374451, "epoch": 6.78, "learning_rate": 1.7859490936414015e-05, "loss": 0.6548, "step": 8027, "task_loss": 1.1790045499801636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4399861693382263, "epoch": 6.79, "learning_rate": 1.78547947778717e-05, "loss": 0.6176, "step": 8028, "task_loss": 0.5377351641654968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8051717281341553, "epoch": 6.79, "learning_rate": 1.7850098619329388e-05, "loss": 0.7058, "step": 8029, "task_loss": 1.3748936653137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46252351999282837, "epoch": 6.79, "learning_rate": 1.7845402460787078e-05, "loss": 0.7076, "step": 8030, "task_loss": 0.8910819888114929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2647662162780762, "epoch": 6.79, "learning_rate": 1.7840706302244764e-05, "loss": 0.8755, "step": 8031, "task_loss": 1.759522557258606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5914826989173889, "epoch": 6.79, "learning_rate": 1.7836010143702454e-05, "loss": 0.7055, "step": 8032, "task_loss": 0.5943803787231445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7254488468170166, "epoch": 6.79, "learning_rate": 1.783131398516014e-05, "loss": 0.6026, "step": 8033, "task_loss": 0.7513352632522583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7732395529747009, "epoch": 6.79, "learning_rate": 1.7826617826617826e-05, "loss": 0.8287, "step": 8034, "task_loss": 0.6602270603179932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5491492748260498, "epoch": 6.79, "learning_rate": 1.7821921668075513e-05, "loss": 0.574, "step": 8035, "task_loss": 0.6980670690536499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6102688312530518, "epoch": 6.79, "learning_rate": 1.7817225509533203e-05, "loss": 0.7835, "step": 8036, "task_loss": 0.3817468583583832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5911701917648315, "epoch": 6.79, "learning_rate": 1.781252935099089e-05, "loss": 0.6996, "step": 8037, "task_loss": 0.412977933883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8916078805923462, "epoch": 6.79, "learning_rate": 1.780783319244858e-05, "loss": 0.8117, "step": 8038, "task_loss": 1.0715104341506958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5727194547653198, "epoch": 6.79, "learning_rate": 1.7803137033906265e-05, "loss": 0.5881, "step": 8039, "task_loss": 0.7758819460868835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4205164909362793, "epoch": 6.8, "learning_rate": 1.7798440875363955e-05, "loss": 0.4401, "step": 8040, "task_loss": 0.3613310754299164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.768086314201355, "epoch": 6.8, "learning_rate": 1.779374471682164e-05, "loss": 0.8839, "step": 8041, "task_loss": 0.602063000202179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5803719758987427, "epoch": 6.8, "learning_rate": 1.7789048558279327e-05, "loss": 0.7653, "step": 8042, "task_loss": 0.3529641032218933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.668161153793335, "epoch": 6.8, "learning_rate": 1.7784352399737017e-05, "loss": 0.7048, "step": 8043, "task_loss": 0.22776482999324799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.19819016754627228, "epoch": 6.8, "learning_rate": 1.7779656241194703e-05, "loss": 0.6453, "step": 8044, "task_loss": 0.011714087799191475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2153489589691162, "epoch": 6.8, "learning_rate": 1.7774960082652393e-05, "loss": 0.7828, "step": 8045, "task_loss": 1.0043518543243408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9566121101379395, "epoch": 6.8, "learning_rate": 1.777026392411008e-05, "loss": 0.8292, "step": 8046, "task_loss": 0.9423202276229858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5960302352905273, "epoch": 6.8, "learning_rate": 1.7765567765567766e-05, "loss": 0.5172, "step": 8047, "task_loss": 0.40406519174575806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.629338800907135, "epoch": 6.8, "learning_rate": 1.7760871607025452e-05, "loss": 0.8247, "step": 8048, "task_loss": 0.5215269327163696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.597755491733551, "epoch": 6.8, "learning_rate": 1.7756175448483142e-05, "loss": 0.7298, "step": 8049, "task_loss": 0.49606993794441223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40288883447647095, "epoch": 6.8, "learning_rate": 1.7751479289940828e-05, "loss": 0.6285, "step": 8050, "task_loss": 0.9734256267547607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5448126792907715, "epoch": 6.81, "learning_rate": 1.7746783131398518e-05, "loss": 0.4602, "step": 8051, "task_loss": 0.40933969616889954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6007298231124878, "epoch": 6.81, "learning_rate": 1.7742086972856204e-05, "loss": 0.7772, "step": 8052, "task_loss": 0.45853662490844727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8129710555076599, "epoch": 6.81, "learning_rate": 1.773739081431389e-05, "loss": 0.7322, "step": 8053, "task_loss": 0.39477023482322693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6558352708816528, "epoch": 6.81, "learning_rate": 1.7732694655771577e-05, "loss": 0.6112, "step": 8054, "task_loss": 0.5795919895172119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8077524900436401, "epoch": 6.81, "learning_rate": 1.7727998497229267e-05, "loss": 0.7334, "step": 8055, "task_loss": 1.1315386295318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4297342896461487, "epoch": 6.81, "learning_rate": 1.7723302338686956e-05, "loss": 0.6482, "step": 8056, "task_loss": 0.5464257597923279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7266287207603455, "epoch": 6.81, "learning_rate": 1.7718606180144643e-05, "loss": 0.6109, "step": 8057, "task_loss": 1.1522022485733032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6117545366287231, "epoch": 6.81, "learning_rate": 1.7713910021602333e-05, "loss": 0.5551, "step": 8058, "task_loss": 0.7950785160064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47485682368278503, "epoch": 6.81, "learning_rate": 1.770921386306002e-05, "loss": 0.8362, "step": 8059, "task_loss": 0.6836209297180176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6119992733001709, "epoch": 6.81, "learning_rate": 1.7704517704517705e-05, "loss": 0.7624, "step": 8060, "task_loss": 0.8337969183921814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9443872570991516, "epoch": 6.81, "learning_rate": 1.769982154597539e-05, "loss": 0.714, "step": 8061, "task_loss": 0.518577516078949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8095872402191162, "epoch": 6.81, "learning_rate": 1.769512538743308e-05, "loss": 0.6415, "step": 8062, "task_loss": 0.5041683912277222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8358902335166931, "epoch": 6.82, "learning_rate": 1.7690429228890768e-05, "loss": 0.7834, "step": 8063, "task_loss": 0.292248010635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5936229228973389, "epoch": 6.82, "learning_rate": 1.7685733070348457e-05, "loss": 0.7463, "step": 8064, "task_loss": 0.23977041244506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6914138197898865, "epoch": 6.82, "learning_rate": 1.7681036911806144e-05, "loss": 0.6652, "step": 8065, "task_loss": 0.2436043620109558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5593518018722534, "epoch": 6.82, "learning_rate": 1.767634075326383e-05, "loss": 0.5839, "step": 8066, "task_loss": 0.49997425079345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9429531097412109, "epoch": 6.82, "learning_rate": 1.7671644594721516e-05, "loss": 0.8617, "step": 8067, "task_loss": 1.357405185699463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5558225512504578, "epoch": 6.82, "learning_rate": 1.7666948436179206e-05, "loss": 0.6137, "step": 8068, "task_loss": 0.5421656370162964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3932081162929535, "epoch": 6.82, "learning_rate": 1.7662252277636892e-05, "loss": 0.6137, "step": 8069, "task_loss": 0.5381183624267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6787765622138977, "epoch": 6.82, "learning_rate": 1.7657556119094582e-05, "loss": 0.6853, "step": 8070, "task_loss": 0.5147101879119873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7284848690032959, "epoch": 6.82, "learning_rate": 1.7652859960552272e-05, "loss": 0.9313, "step": 8071, "task_loss": 1.4166628122329712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6111432909965515, "epoch": 6.82, "learning_rate": 1.7648163802009958e-05, "loss": 0.4862, "step": 8072, "task_loss": 0.7143856287002563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.987689197063446, "epoch": 6.82, "learning_rate": 1.7643467643467645e-05, "loss": 0.608, "step": 8073, "task_loss": 1.2131282091140747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6877415180206299, "epoch": 6.82, "learning_rate": 1.763877148492533e-05, "loss": 0.6034, "step": 8074, "task_loss": 0.7441884875297546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4577544033527374, "epoch": 6.83, "learning_rate": 1.763407532638302e-05, "loss": 0.8457, "step": 8075, "task_loss": 0.046392105519771576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5881800651550293, "epoch": 6.83, "learning_rate": 1.7629379167840707e-05, "loss": 0.5519, "step": 8076, "task_loss": 0.2376038283109665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6128990650177002, "epoch": 6.83, "learning_rate": 1.7624683009298397e-05, "loss": 0.6078, "step": 8077, "task_loss": 1.2950046062469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44172385334968567, "epoch": 6.83, "learning_rate": 1.7619986850756083e-05, "loss": 0.4872, "step": 8078, "task_loss": 1.2197988033294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3616688847541809, "epoch": 6.83, "learning_rate": 1.761529069221377e-05, "loss": 0.5629, "step": 8079, "task_loss": 0.511985719203949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7056217789649963, "epoch": 6.83, "learning_rate": 1.7610594533671456e-05, "loss": 0.7298, "step": 8080, "task_loss": 1.1085784435272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.698265790939331, "epoch": 6.83, "learning_rate": 1.7605898375129145e-05, "loss": 0.8441, "step": 8081, "task_loss": 1.2503331899642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5099778175354004, "epoch": 6.83, "learning_rate": 1.7601202216586832e-05, "loss": 0.7416, "step": 8082, "task_loss": 0.18721826374530792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8456506729125977, "epoch": 6.83, "learning_rate": 1.759650605804452e-05, "loss": 0.8655, "step": 8083, "task_loss": 1.0812793970108032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7680861949920654, "epoch": 6.83, "learning_rate": 1.7591809899502208e-05, "loss": 0.7015, "step": 8084, "task_loss": 0.3023846745491028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5395148396492004, "epoch": 6.83, "learning_rate": 1.7587113740959894e-05, "loss": 0.7055, "step": 8085, "task_loss": 0.3873963952064514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7604743242263794, "epoch": 6.83, "learning_rate": 1.7582417582417584e-05, "loss": 0.6056, "step": 8086, "task_loss": 0.40063923597335815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8563343286514282, "epoch": 6.84, "learning_rate": 1.757772142387527e-05, "loss": 0.8563, "step": 8087, "task_loss": 1.076812744140625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44253695011138916, "epoch": 6.84, "learning_rate": 1.757302526533296e-05, "loss": 0.5368, "step": 8088, "task_loss": 0.5698300004005432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8797450661659241, "epoch": 6.84, "learning_rate": 1.7568329106790646e-05, "loss": 0.6164, "step": 8089, "task_loss": 0.6212620139122009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.393185019493103, "epoch": 6.84, "learning_rate": 1.7563632948248336e-05, "loss": 0.593, "step": 8090, "task_loss": 0.2525624930858612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7706207036972046, "epoch": 6.84, "learning_rate": 1.7558936789706022e-05, "loss": 0.5679, "step": 8091, "task_loss": 0.38418513536453247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8188756704330444, "epoch": 6.84, "learning_rate": 1.755424063116371e-05, "loss": 0.5242, "step": 8092, "task_loss": 0.9630873799324036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.333770751953125, "epoch": 6.84, "learning_rate": 1.7549544472621395e-05, "loss": 0.8928, "step": 8093, "task_loss": 1.2351491451263428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6036602854728699, "epoch": 6.84, "learning_rate": 1.7544848314079085e-05, "loss": 0.613, "step": 8094, "task_loss": 0.3608337640762329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8603923320770264, "epoch": 6.84, "learning_rate": 1.754015215553677e-05, "loss": 0.6447, "step": 8095, "task_loss": 1.1995042562484741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7729570269584656, "epoch": 6.84, "learning_rate": 1.753545599699446e-05, "loss": 0.7472, "step": 8096, "task_loss": 0.7506545782089233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6201485395431519, "epoch": 6.84, "learning_rate": 1.7530759838452147e-05, "loss": 0.7093, "step": 8097, "task_loss": 1.1131349802017212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4656873941421509, "epoch": 6.84, "learning_rate": 1.7526063679909834e-05, "loss": 0.7223, "step": 8098, "task_loss": 0.3676879405975342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6807432174682617, "epoch": 6.85, "learning_rate": 1.752136752136752e-05, "loss": 0.7488, "step": 8099, "task_loss": 0.7314665913581848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6171824336051941, "epoch": 6.85, "learning_rate": 1.751667136282521e-05, "loss": 0.6109, "step": 8100, "task_loss": 0.6037335991859436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6316799521446228, "epoch": 6.85, "learning_rate": 1.7511975204282896e-05, "loss": 0.6375, "step": 8101, "task_loss": 1.289804458618164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5462307929992676, "epoch": 6.85, "learning_rate": 1.7507279045740586e-05, "loss": 0.7078, "step": 8102, "task_loss": 0.0904335007071495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6417200565338135, "epoch": 6.85, "learning_rate": 1.7502582887198275e-05, "loss": 0.7579, "step": 8103, "task_loss": 0.7018250823020935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8444219827651978, "epoch": 6.85, "learning_rate": 1.749788672865596e-05, "loss": 0.8774, "step": 8104, "task_loss": 1.3110814094543457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5336322784423828, "epoch": 6.85, "learning_rate": 1.7493190570113648e-05, "loss": 0.6376, "step": 8105, "task_loss": 1.0098947286605835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6110276579856873, "epoch": 6.85, "learning_rate": 1.7488494411571334e-05, "loss": 0.663, "step": 8106, "task_loss": 1.1007336378097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5509042739868164, "epoch": 6.85, "learning_rate": 1.7483798253029024e-05, "loss": 0.4717, "step": 8107, "task_loss": 1.2208508253097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45111000537872314, "epoch": 6.85, "learning_rate": 1.747910209448671e-05, "loss": 0.7191, "step": 8108, "task_loss": 0.6024281978607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8496984243392944, "epoch": 6.85, "learning_rate": 1.74744059359444e-05, "loss": 0.8678, "step": 8109, "task_loss": 0.5414817333221436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6412103772163391, "epoch": 6.85, "learning_rate": 1.7469709777402087e-05, "loss": 0.6477, "step": 8110, "task_loss": 1.0421559810638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7199171185493469, "epoch": 6.86, "learning_rate": 1.7465013618859773e-05, "loss": 0.6289, "step": 8111, "task_loss": 0.5767414569854736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2849491238594055, "epoch": 6.86, "learning_rate": 1.746031746031746e-05, "loss": 0.5021, "step": 8112, "task_loss": 0.2874775230884552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9090442657470703, "epoch": 6.86, "learning_rate": 1.745562130177515e-05, "loss": 0.7602, "step": 8113, "task_loss": 1.7106091976165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5548120141029358, "epoch": 6.86, "learning_rate": 1.7450925143232835e-05, "loss": 0.7201, "step": 8114, "task_loss": 0.20063790678977966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5733697414398193, "epoch": 6.86, "learning_rate": 1.7446228984690525e-05, "loss": 0.6138, "step": 8115, "task_loss": 0.4877995252609253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43681979179382324, "epoch": 6.86, "learning_rate": 1.744153282614821e-05, "loss": 0.4679, "step": 8116, "task_loss": 0.22855518758296967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6914660930633545, "epoch": 6.86, "learning_rate": 1.7436836667605898e-05, "loss": 0.5971, "step": 8117, "task_loss": 0.8590465784072876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5538546442985535, "epoch": 6.86, "learning_rate": 1.7432140509063587e-05, "loss": 0.5766, "step": 8118, "task_loss": 0.6903925538063049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9577622413635254, "epoch": 6.86, "learning_rate": 1.7427444350521274e-05, "loss": 0.6942, "step": 8119, "task_loss": 1.4929178953170776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6811849474906921, "epoch": 6.86, "learning_rate": 1.7422748191978963e-05, "loss": 0.7369, "step": 8120, "task_loss": 0.5386447906494141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6813338398933411, "epoch": 6.86, "learning_rate": 1.741805203343665e-05, "loss": 0.7828, "step": 8121, "task_loss": 1.2732460498809814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.986494779586792, "epoch": 6.87, "learning_rate": 1.741335587489434e-05, "loss": 0.8676, "step": 8122, "task_loss": 1.2339004278182983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6859373450279236, "epoch": 6.87, "learning_rate": 1.7408659716352026e-05, "loss": 0.6468, "step": 8123, "task_loss": 0.7815605401992798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6443341970443726, "epoch": 6.87, "learning_rate": 1.7403963557809712e-05, "loss": 0.7532, "step": 8124, "task_loss": 1.8356502056121826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6717321872711182, "epoch": 6.87, "learning_rate": 1.73992673992674e-05, "loss": 0.6291, "step": 8125, "task_loss": 0.9082585573196411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.588945209980011, "epoch": 6.87, "learning_rate": 1.7394571240725088e-05, "loss": 0.8331, "step": 8126, "task_loss": 0.8817654252052307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1301257610321045, "epoch": 6.87, "learning_rate": 1.7389875082182775e-05, "loss": 0.8745, "step": 8127, "task_loss": 1.6246498823165894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5139785408973694, "epoch": 6.87, "learning_rate": 1.7385178923640464e-05, "loss": 0.5089, "step": 8128, "task_loss": 0.14038360118865967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1417827606201172, "epoch": 6.87, "learning_rate": 1.738048276509815e-05, "loss": 0.8875, "step": 8129, "task_loss": 0.6945188641548157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.971738338470459, "epoch": 6.87, "learning_rate": 1.7375786606555837e-05, "loss": 0.6669, "step": 8130, "task_loss": 0.3653593957424164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47721678018569946, "epoch": 6.87, "learning_rate": 1.7371090448013523e-05, "loss": 0.629, "step": 8131, "task_loss": 0.8040969371795654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.18415188789367676, "epoch": 6.87, "learning_rate": 1.7366394289471213e-05, "loss": 0.6567, "step": 8132, "task_loss": 0.8115647435188293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5667914748191833, "epoch": 6.87, "learning_rate": 1.7361698130928903e-05, "loss": 0.5564, "step": 8133, "task_loss": 0.2141646444797516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7211438417434692, "epoch": 6.88, "learning_rate": 1.735700197238659e-05, "loss": 0.7305, "step": 8134, "task_loss": 1.0475313663482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7538830637931824, "epoch": 6.88, "learning_rate": 1.735230581384428e-05, "loss": 0.696, "step": 8135, "task_loss": 1.1333682537078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.19587980210781097, "epoch": 6.88, "learning_rate": 1.7347609655301962e-05, "loss": 0.4072, "step": 8136, "task_loss": 0.028609497472643852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5208079814910889, "epoch": 6.88, "learning_rate": 1.734291349675965e-05, "loss": 0.5723, "step": 8137, "task_loss": 1.4317752122879028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39321523904800415, "epoch": 6.88, "learning_rate": 1.7338217338217338e-05, "loss": 0.6205, "step": 8138, "task_loss": 0.7644479870796204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6411881446838379, "epoch": 6.88, "learning_rate": 1.7333521179675028e-05, "loss": 0.8302, "step": 8139, "task_loss": 0.5537649989128113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5100871324539185, "epoch": 6.88, "learning_rate": 1.7328825021132714e-05, "loss": 0.5629, "step": 8140, "task_loss": 0.1531241089105606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3914220929145813, "epoch": 6.88, "learning_rate": 1.7324128862590404e-05, "loss": 0.563, "step": 8141, "task_loss": 0.10520809888839722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28038308024406433, "epoch": 6.88, "learning_rate": 1.731943270404809e-05, "loss": 0.5928, "step": 8142, "task_loss": 0.29231780767440796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.925789475440979, "epoch": 6.88, "learning_rate": 1.7314736545505776e-05, "loss": 0.6737, "step": 8143, "task_loss": 0.6729716658592224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7494056224822998, "epoch": 6.88, "learning_rate": 1.7310040386963463e-05, "loss": 0.6501, "step": 8144, "task_loss": 0.5337806344032288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6185156106948853, "epoch": 6.88, "learning_rate": 1.7305344228421152e-05, "loss": 0.4341, "step": 8145, "task_loss": 0.7601549625396729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6812223792076111, "epoch": 6.89, "learning_rate": 1.730064806987884e-05, "loss": 1.1008, "step": 8146, "task_loss": 0.4211384356021881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8308714628219604, "epoch": 6.89, "learning_rate": 1.729595191133653e-05, "loss": 0.8542, "step": 8147, "task_loss": 0.368407279253006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6035304665565491, "epoch": 6.89, "learning_rate": 1.7291255752794215e-05, "loss": 0.7483, "step": 8148, "task_loss": 0.7557377815246582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37824946641921997, "epoch": 6.89, "learning_rate": 1.72865595942519e-05, "loss": 0.4731, "step": 8149, "task_loss": 0.3331942558288574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9033036231994629, "epoch": 6.89, "learning_rate": 1.728186343570959e-05, "loss": 0.8013, "step": 8150, "task_loss": 0.6808444261550903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9987159371376038, "epoch": 6.89, "learning_rate": 1.7277167277167277e-05, "loss": 0.8089, "step": 8151, "task_loss": 1.4711419343948364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6518374681472778, "epoch": 6.89, "learning_rate": 1.7272471118624967e-05, "loss": 0.5299, "step": 8152, "task_loss": 0.41068917512893677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8298474550247192, "epoch": 6.89, "learning_rate": 1.7267774960082653e-05, "loss": 0.6501, "step": 8153, "task_loss": 1.5824090242385864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7021873593330383, "epoch": 6.89, "learning_rate": 1.7263078801540343e-05, "loss": 0.6806, "step": 8154, "task_loss": 0.9465674161911011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5204759836196899, "epoch": 6.89, "learning_rate": 1.725838264299803e-05, "loss": 0.6582, "step": 8155, "task_loss": 0.45224758982658386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7647760510444641, "epoch": 6.89, "learning_rate": 1.7253686484455716e-05, "loss": 0.7349, "step": 8156, "task_loss": 1.0484544038772583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7216331958770752, "epoch": 6.89, "learning_rate": 1.7248990325913402e-05, "loss": 0.8441, "step": 8157, "task_loss": 0.5260019302368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.762638509273529, "epoch": 6.9, "learning_rate": 1.7244294167371092e-05, "loss": 0.5855, "step": 8158, "task_loss": 0.8446897268295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7615695595741272, "epoch": 6.9, "learning_rate": 1.7239598008828778e-05, "loss": 0.8133, "step": 8159, "task_loss": 0.6793491840362549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5369900465011597, "epoch": 6.9, "learning_rate": 1.7234901850286468e-05, "loss": 0.8226, "step": 8160, "task_loss": 0.5459457039833069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5788927674293518, "epoch": 6.9, "learning_rate": 1.7230205691744154e-05, "loss": 0.7685, "step": 8161, "task_loss": 0.2308109998703003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5146836042404175, "epoch": 6.9, "learning_rate": 1.722550953320184e-05, "loss": 0.8333, "step": 8162, "task_loss": 0.2709534168243408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4398704469203949, "epoch": 6.9, "learning_rate": 1.722081337465953e-05, "loss": 0.8517, "step": 8163, "task_loss": 0.18044772744178772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5392913818359375, "epoch": 6.9, "learning_rate": 1.7216117216117217e-05, "loss": 0.6371, "step": 8164, "task_loss": 0.893333911895752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2595422565937042, "epoch": 6.9, "learning_rate": 1.7211421057574906e-05, "loss": 0.6546, "step": 8165, "task_loss": 0.20396387577056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.512787938117981, "epoch": 6.9, "learning_rate": 1.7206724899032593e-05, "loss": 0.6535, "step": 8166, "task_loss": 0.6848084926605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40187567472457886, "epoch": 6.9, "learning_rate": 1.7202028740490282e-05, "loss": 0.735, "step": 8167, "task_loss": 0.6131902933120728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5052165389060974, "epoch": 6.9, "learning_rate": 1.7197332581947965e-05, "loss": 0.6917, "step": 8168, "task_loss": 0.5867551565170288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6432033777236938, "epoch": 6.9, "learning_rate": 1.7192636423405655e-05, "loss": 0.7089, "step": 8169, "task_loss": 0.48835888504981995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8751969337463379, "epoch": 6.91, "learning_rate": 1.718794026486334e-05, "loss": 0.7051, "step": 8170, "task_loss": 0.9686394333839417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7078771591186523, "epoch": 6.91, "learning_rate": 1.718324410632103e-05, "loss": 0.7648, "step": 8171, "task_loss": 1.382512092590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5595644116401672, "epoch": 6.91, "learning_rate": 1.7178547947778718e-05, "loss": 0.6997, "step": 8172, "task_loss": 1.0817697048187256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6985122561454773, "epoch": 6.91, "learning_rate": 1.7173851789236407e-05, "loss": 0.6749, "step": 8173, "task_loss": 1.4615943431854248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0168942213058472, "epoch": 6.91, "learning_rate": 1.7169155630694094e-05, "loss": 0.9048, "step": 8174, "task_loss": 1.4677472114562988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3328709602355957, "epoch": 6.91, "learning_rate": 1.716445947215178e-05, "loss": 0.6923, "step": 8175, "task_loss": 0.9472116827964783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5383782982826233, "epoch": 6.91, "learning_rate": 1.7159763313609466e-05, "loss": 0.5835, "step": 8176, "task_loss": 0.19932466745376587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5307043790817261, "epoch": 6.91, "learning_rate": 1.7155067155067156e-05, "loss": 0.6935, "step": 8177, "task_loss": 2.285045862197876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44599249958992004, "epoch": 6.91, "learning_rate": 1.7150370996524842e-05, "loss": 0.6981, "step": 8178, "task_loss": 0.3017326295375824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3936731219291687, "epoch": 6.91, "learning_rate": 1.7145674837982532e-05, "loss": 0.559, "step": 8179, "task_loss": 0.9211771488189697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6341065764427185, "epoch": 6.91, "learning_rate": 1.714097867944022e-05, "loss": 0.5361, "step": 8180, "task_loss": 0.5204980969429016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9433261156082153, "epoch": 6.91, "learning_rate": 1.7136282520897905e-05, "loss": 0.7306, "step": 8181, "task_loss": 0.9570217728614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3418322503566742, "epoch": 6.92, "learning_rate": 1.7131586362355594e-05, "loss": 0.5553, "step": 8182, "task_loss": 0.7738521099090576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5210729837417603, "epoch": 6.92, "learning_rate": 1.712689020381328e-05, "loss": 0.8185, "step": 8183, "task_loss": 1.394591212272644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9095636010169983, "epoch": 6.92, "learning_rate": 1.712219404527097e-05, "loss": 0.7429, "step": 8184, "task_loss": 1.4811350107192993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.57829749584198, "epoch": 6.92, "learning_rate": 1.7117497886728657e-05, "loss": 0.5109, "step": 8185, "task_loss": 0.3243939280509949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6516492962837219, "epoch": 6.92, "learning_rate": 1.7112801728186347e-05, "loss": 0.6236, "step": 8186, "task_loss": 1.203939437866211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3501132130622864, "epoch": 6.92, "learning_rate": 1.710810556964403e-05, "loss": 0.5855, "step": 8187, "task_loss": 0.10756329447031021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6217337846755981, "epoch": 6.92, "learning_rate": 1.710340941110172e-05, "loss": 0.5081, "step": 8188, "task_loss": 0.5090513825416565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6538976430892944, "epoch": 6.92, "learning_rate": 1.7098713252559406e-05, "loss": 0.6761, "step": 8189, "task_loss": 0.5372185111045837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.443137526512146, "epoch": 6.92, "learning_rate": 1.7094017094017095e-05, "loss": 0.6905, "step": 8190, "task_loss": 1.4223047494888306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4753764867782593, "epoch": 6.92, "learning_rate": 1.7089320935474782e-05, "loss": 0.4717, "step": 8191, "task_loss": 0.6580274701118469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0920145511627197, "epoch": 6.92, "learning_rate": 1.708462477693247e-05, "loss": 0.9592, "step": 8192, "task_loss": 0.6633914113044739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.775753378868103, "epoch": 6.93, "learning_rate": 1.7079928618390158e-05, "loss": 0.7043, "step": 8193, "task_loss": 0.6734439730644226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8380321860313416, "epoch": 6.93, "learning_rate": 1.7075232459847844e-05, "loss": 0.5941, "step": 8194, "task_loss": 0.4275500774383545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6615475416183472, "epoch": 6.93, "learning_rate": 1.7070536301305534e-05, "loss": 0.6263, "step": 8195, "task_loss": 0.315200537443161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.85567706823349, "epoch": 6.93, "learning_rate": 1.706584014276322e-05, "loss": 0.7725, "step": 8196, "task_loss": 0.892385721206665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3411781787872314, "epoch": 6.93, "learning_rate": 1.706114398422091e-05, "loss": 0.8573, "step": 8197, "task_loss": 1.0106481313705444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6430004835128784, "epoch": 6.93, "learning_rate": 1.7056447825678596e-05, "loss": 0.7649, "step": 8198, "task_loss": 0.3745318353176117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5760970711708069, "epoch": 6.93, "learning_rate": 1.7051751667136283e-05, "loss": 0.5526, "step": 8199, "task_loss": 1.0307451486587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5167843103408813, "epoch": 6.93, "learning_rate": 1.704705550859397e-05, "loss": 0.6333, "step": 8200, "task_loss": 0.3765457570552826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35068458318710327, "epoch": 6.93, "learning_rate": 1.704235935005166e-05, "loss": 0.8303, "step": 8201, "task_loss": 0.7199140191078186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6123879551887512, "epoch": 6.93, "learning_rate": 1.7037663191509345e-05, "loss": 0.5824, "step": 8202, "task_loss": 1.0390218496322632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0084372758865356, "epoch": 6.93, "learning_rate": 1.7032967032967035e-05, "loss": 0.7282, "step": 8203, "task_loss": 1.3634339570999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.502724826335907, "epoch": 6.93, "learning_rate": 1.702827087442472e-05, "loss": 0.5184, "step": 8204, "task_loss": 0.4759935140609741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4825650453567505, "epoch": 6.94, "learning_rate": 1.702357471588241e-05, "loss": 0.6468, "step": 8205, "task_loss": 0.1319652497768402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6304592490196228, "epoch": 6.94, "learning_rate": 1.7018878557340097e-05, "loss": 0.7158, "step": 8206, "task_loss": 1.5240745544433594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5963502526283264, "epoch": 6.94, "learning_rate": 1.7014182398797783e-05, "loss": 0.5952, "step": 8207, "task_loss": 0.43074753880500793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30828818678855896, "epoch": 6.94, "learning_rate": 1.700948624025547e-05, "loss": 0.6051, "step": 8208, "task_loss": 0.3159300684928894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5868615508079529, "epoch": 6.94, "learning_rate": 1.700479008171316e-05, "loss": 0.6904, "step": 8209, "task_loss": 0.8068860769271851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7866762280464172, "epoch": 6.94, "learning_rate": 1.700009392317085e-05, "loss": 0.854, "step": 8210, "task_loss": 1.5475836992263794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6057859659194946, "epoch": 6.94, "learning_rate": 1.6995397764628536e-05, "loss": 0.5502, "step": 8211, "task_loss": 1.2508349418640137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33474215865135193, "epoch": 6.94, "learning_rate": 1.6990701606086222e-05, "loss": 0.6884, "step": 8212, "task_loss": 0.46864303946495056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3510148227214813, "epoch": 6.94, "learning_rate": 1.6986005447543908e-05, "loss": 0.7428, "step": 8213, "task_loss": 0.5251845717430115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8328485488891602, "epoch": 6.94, "learning_rate": 1.6981309289001598e-05, "loss": 0.6434, "step": 8214, "task_loss": 0.4234258532524109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0460220575332642, "epoch": 6.94, "learning_rate": 1.6976613130459284e-05, "loss": 0.7528, "step": 8215, "task_loss": 0.9814617037773132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8307148814201355, "epoch": 6.94, "learning_rate": 1.6971916971916974e-05, "loss": 0.7871, "step": 8216, "task_loss": 0.4819900393486023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6192447543144226, "epoch": 6.95, "learning_rate": 1.696722081337466e-05, "loss": 0.6727, "step": 8217, "task_loss": 0.8150542974472046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5546470284461975, "epoch": 6.95, "learning_rate": 1.696252465483235e-05, "loss": 0.6425, "step": 8218, "task_loss": 1.4672889709472656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7491776943206787, "epoch": 6.95, "learning_rate": 1.6957828496290033e-05, "loss": 0.6271, "step": 8219, "task_loss": 0.7813766598701477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7578977346420288, "epoch": 6.95, "learning_rate": 1.6953132337747723e-05, "loss": 0.9538, "step": 8220, "task_loss": 0.643196165561676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8453570008277893, "epoch": 6.95, "learning_rate": 1.694843617920541e-05, "loss": 0.6284, "step": 8221, "task_loss": 1.1305489540100098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7399015426635742, "epoch": 6.95, "learning_rate": 1.69437400206631e-05, "loss": 0.6772, "step": 8222, "task_loss": 0.8548632264137268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8026523590087891, "epoch": 6.95, "learning_rate": 1.6939043862120785e-05, "loss": 0.7572, "step": 8223, "task_loss": 0.8651464581489563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4790987968444824, "epoch": 6.95, "learning_rate": 1.6934347703578475e-05, "loss": 0.6768, "step": 8224, "task_loss": 0.1657431274652481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8209432363510132, "epoch": 6.95, "learning_rate": 1.692965154503616e-05, "loss": 0.8105, "step": 8225, "task_loss": 0.30885183811187744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6117878556251526, "epoch": 6.95, "learning_rate": 1.6924955386493848e-05, "loss": 0.7415, "step": 8226, "task_loss": 0.4352125823497772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5691406726837158, "epoch": 6.95, "learning_rate": 1.6920259227951537e-05, "loss": 0.6472, "step": 8227, "task_loss": 0.7705469727516174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.718815267086029, "epoch": 6.95, "learning_rate": 1.6915563069409224e-05, "loss": 0.6092, "step": 8228, "task_loss": 0.5996741056442261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0849266052246094, "epoch": 6.96, "learning_rate": 1.6910866910866913e-05, "loss": 0.7904, "step": 8229, "task_loss": 0.7435019016265869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9595440626144409, "epoch": 6.96, "learning_rate": 1.69061707523246e-05, "loss": 0.8335, "step": 8230, "task_loss": 0.5913066267967224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7968192100524902, "epoch": 6.96, "learning_rate": 1.6901474593782286e-05, "loss": 0.6633, "step": 8231, "task_loss": 0.7108122706413269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7491438388824463, "epoch": 6.96, "learning_rate": 1.6896778435239972e-05, "loss": 0.7418, "step": 8232, "task_loss": 1.3001651763916016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45360925793647766, "epoch": 6.96, "learning_rate": 1.6892082276697662e-05, "loss": 0.4893, "step": 8233, "task_loss": 0.18607290089130402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5600759983062744, "epoch": 6.96, "learning_rate": 1.688738611815535e-05, "loss": 0.6194, "step": 8234, "task_loss": 0.6662044525146484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1775197982788086, "epoch": 6.96, "learning_rate": 1.6882689959613038e-05, "loss": 0.807, "step": 8235, "task_loss": 1.0509095191955566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37883633375167847, "epoch": 6.96, "learning_rate": 1.6877993801070725e-05, "loss": 0.5439, "step": 8236, "task_loss": 0.5414415597915649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5961737632751465, "epoch": 6.96, "learning_rate": 1.6873297642528414e-05, "loss": 0.6374, "step": 8237, "task_loss": 1.0837223529815674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.55787593126297, "epoch": 6.96, "learning_rate": 1.68686014839861e-05, "loss": 0.6399, "step": 8238, "task_loss": 0.5217234492301941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5135990381240845, "epoch": 6.96, "learning_rate": 1.6863905325443787e-05, "loss": 0.6265, "step": 8239, "task_loss": 0.3302956223487854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7488323450088501, "epoch": 6.96, "learning_rate": 1.6859209166901477e-05, "loss": 0.6831, "step": 8240, "task_loss": 0.3625866174697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5580921769142151, "epoch": 6.97, "learning_rate": 1.6854513008359163e-05, "loss": 0.7438, "step": 8241, "task_loss": 0.28112518787384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46719154715538025, "epoch": 6.97, "learning_rate": 1.6849816849816853e-05, "loss": 0.5208, "step": 8242, "task_loss": 0.8604084253311157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.657219409942627, "epoch": 6.97, "learning_rate": 1.684512069127454e-05, "loss": 0.7774, "step": 8243, "task_loss": 0.22282832860946655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2052643299102783, "epoch": 6.97, "learning_rate": 1.6840424532732225e-05, "loss": 0.8043, "step": 8244, "task_loss": 1.2041168212890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.913894772529602, "epoch": 6.97, "learning_rate": 1.6835728374189912e-05, "loss": 0.7204, "step": 8245, "task_loss": 1.4525535106658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9503656029701233, "epoch": 6.97, "learning_rate": 1.68310322156476e-05, "loss": 0.846, "step": 8246, "task_loss": 0.8675366640090942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8129103183746338, "epoch": 6.97, "learning_rate": 1.6826336057105288e-05, "loss": 0.6287, "step": 8247, "task_loss": 0.5677782893180847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8578076362609863, "epoch": 6.97, "learning_rate": 1.6821639898562978e-05, "loss": 0.7832, "step": 8248, "task_loss": 0.9151036739349365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5810949802398682, "epoch": 6.97, "learning_rate": 1.6816943740020664e-05, "loss": 0.6398, "step": 8249, "task_loss": 0.9931421875953674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1725833415985107, "epoch": 6.97, "learning_rate": 1.6812247581478354e-05, "loss": 1.0306, "step": 8250, "task_loss": 1.592797040939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3865169286727905, "epoch": 6.97, "learning_rate": 1.6807551422936037e-05, "loss": 0.4941, "step": 8251, "task_loss": 0.2566170394420624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8158028721809387, "epoch": 6.97, "learning_rate": 1.6802855264393726e-05, "loss": 0.7687, "step": 8252, "task_loss": 0.6082451939582825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6630833148956299, "epoch": 6.98, "learning_rate": 1.6798159105851413e-05, "loss": 0.665, "step": 8253, "task_loss": 0.18085965514183044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6660779714584351, "epoch": 6.98, "learning_rate": 1.6793462947309102e-05, "loss": 0.6627, "step": 8254, "task_loss": 1.0900431871414185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6251408457756042, "epoch": 6.98, "learning_rate": 1.678876678876679e-05, "loss": 0.7099, "step": 8255, "task_loss": 0.817121684551239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5671027898788452, "epoch": 6.98, "learning_rate": 1.678407063022448e-05, "loss": 0.5794, "step": 8256, "task_loss": 0.7014181017875671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3188495635986328, "epoch": 6.98, "learning_rate": 1.6779374471682165e-05, "loss": 0.5816, "step": 8257, "task_loss": 0.0567346066236496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9631364941596985, "epoch": 6.98, "learning_rate": 1.677467831313985e-05, "loss": 0.7888, "step": 8258, "task_loss": 0.7746395468711853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3986247479915619, "epoch": 6.98, "learning_rate": 1.676998215459754e-05, "loss": 0.7492, "step": 8259, "task_loss": 2.021054267883301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0316064357757568, "epoch": 6.98, "learning_rate": 1.6765285996055227e-05, "loss": 1.127, "step": 8260, "task_loss": 1.0009039640426636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9992505311965942, "epoch": 6.98, "learning_rate": 1.6760589837512917e-05, "loss": 0.8928, "step": 8261, "task_loss": 0.9072684645652771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9369519948959351, "epoch": 6.98, "learning_rate": 1.6755893678970603e-05, "loss": 0.9967, "step": 8262, "task_loss": 1.648752212524414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3783581256866455, "epoch": 6.98, "learning_rate": 1.675119752042829e-05, "loss": 0.5941, "step": 8263, "task_loss": 0.586639404296875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.725365161895752, "epoch": 6.99, "learning_rate": 1.6746501361885976e-05, "loss": 0.8363, "step": 8264, "task_loss": 0.7603028416633606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6735140085220337, "epoch": 6.99, "learning_rate": 1.6741805203343666e-05, "loss": 0.7064, "step": 8265, "task_loss": 0.7526024580001831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5585086345672607, "epoch": 6.99, "learning_rate": 1.6737109044801352e-05, "loss": 0.5811, "step": 8266, "task_loss": 0.31808507442474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8583345413208008, "epoch": 6.99, "learning_rate": 1.6732412886259042e-05, "loss": 0.651, "step": 8267, "task_loss": 0.9813191890716553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6888899803161621, "epoch": 6.99, "learning_rate": 1.6727716727716728e-05, "loss": 0.6286, "step": 8268, "task_loss": 0.8632363080978394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.18807867169380188, "epoch": 6.99, "learning_rate": 1.6723020569174418e-05, "loss": 0.4509, "step": 8269, "task_loss": 0.0150699932128191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5483200550079346, "epoch": 6.99, "learning_rate": 1.67183244106321e-05, "loss": 0.5577, "step": 8270, "task_loss": 0.9129177331924438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8738926649093628, "epoch": 6.99, "learning_rate": 1.671362825208979e-05, "loss": 0.7083, "step": 8271, "task_loss": 0.6949505805969238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.988156795501709, "epoch": 6.99, "learning_rate": 1.670893209354748e-05, "loss": 0.7352, "step": 8272, "task_loss": 1.2114613056182861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3697923421859741, "epoch": 6.99, "learning_rate": 1.6704235935005167e-05, "loss": 0.6265, "step": 8273, "task_loss": 0.45769426226615906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4095662534236908, "epoch": 6.99, "learning_rate": 1.6699539776462856e-05, "loss": 0.6469, "step": 8274, "task_loss": 0.5900315642356873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7218039035797119, "epoch": 6.99, "learning_rate": 1.6694843617920543e-05, "loss": 0.7413, "step": 8275, "task_loss": 1.0810508728027344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8326165676116943, "epoch": 7.0, "learning_rate": 1.669014745937823e-05, "loss": 0.7588, "step": 8276, "task_loss": 1.031529426574707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25207871198654175, "epoch": 7.0, "learning_rate": 1.6685451300835915e-05, "loss": 0.5325, "step": 8277, "task_loss": 1.3076504468917847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1497375965118408, "epoch": 7.0, "learning_rate": 1.6680755142293605e-05, "loss": 0.7355, "step": 8278, "task_loss": 1.0966205596923828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46010109782218933, "epoch": 7.0, "learning_rate": 1.667605898375129e-05, "loss": 0.6855, "step": 8279, "task_loss": 0.6017349362373352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5598931908607483, "epoch": 7.0, "learning_rate": 1.667136282520898e-05, "loss": 0.703, "step": 8280, "task_loss": 0.30978474020957947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38397496938705444, "epoch": 7.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.8206, "step": 8281, "task_loss": 0.09734808653593063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6556918621063232, "epoch": 7.0, "learning_rate": 1.6661970508124354e-05, "loss": 1.2385, "step": 8282, "task_loss": 1.4574151039123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4400886297225952, "epoch": 7.0, "learning_rate": 1.665727434958204e-05, "loss": 0.5112, "step": 8283, "task_loss": 0.4875570833683014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7489319443702698, "epoch": 7.0, "learning_rate": 1.665257819103973e-05, "loss": 0.8046, "step": 8284, "task_loss": 0.8576010465621948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7353480458259583, "epoch": 7.0, "learning_rate": 1.6647882032497416e-05, "loss": 0.9708, "step": 8285, "task_loss": 0.8593668341636658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44553419947624207, "epoch": 7.0, "learning_rate": 1.6643185873955106e-05, "loss": 0.5505, "step": 8286, "task_loss": 1.1241674423217773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8107983469963074, "epoch": 7.01, "learning_rate": 1.6638489715412796e-05, "loss": 0.6563, "step": 8287, "task_loss": 1.1499189138412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7141011357307434, "epoch": 7.01, "learning_rate": 1.6633793556870482e-05, "loss": 0.6378, "step": 8288, "task_loss": 0.29104092717170715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4113994240760803, "epoch": 7.01, "learning_rate": 1.662909739832817e-05, "loss": 0.4574, "step": 8289, "task_loss": 0.6397565603256226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4586312174797058, "epoch": 7.01, "learning_rate": 1.6624401239785855e-05, "loss": 0.6528, "step": 8290, "task_loss": 0.5020477175712585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1405751705169678, "epoch": 7.01, "learning_rate": 1.6619705081243544e-05, "loss": 0.8842, "step": 8291, "task_loss": 1.1339741945266724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5416803359985352, "epoch": 7.01, "learning_rate": 1.661500892270123e-05, "loss": 0.5349, "step": 8292, "task_loss": 0.3730884790420532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.299846351146698, "epoch": 7.01, "learning_rate": 1.661031276415892e-05, "loss": 0.5233, "step": 8293, "task_loss": 0.20945407450199127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7444020509719849, "epoch": 7.01, "learning_rate": 1.6605616605616607e-05, "loss": 0.7975, "step": 8294, "task_loss": 1.6628270149230957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43929219245910645, "epoch": 7.01, "learning_rate": 1.6600920447074293e-05, "loss": 0.5296, "step": 8295, "task_loss": 0.13940246403217316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3861343562602997, "epoch": 7.01, "learning_rate": 1.659622428853198e-05, "loss": 0.6913, "step": 8296, "task_loss": 0.04676276072859764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7730162739753723, "epoch": 7.01, "learning_rate": 1.659152812998967e-05, "loss": 0.6389, "step": 8297, "task_loss": 1.090760588645935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4901772439479828, "epoch": 7.01, "learning_rate": 1.6586831971447356e-05, "loss": 0.4891, "step": 8298, "task_loss": 0.43117621541023254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7656208276748657, "epoch": 7.02, "learning_rate": 1.6582135812905045e-05, "loss": 0.8823, "step": 8299, "task_loss": 0.6806660294532776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1021418571472168, "epoch": 7.02, "learning_rate": 1.657743965436273e-05, "loss": 0.7504, "step": 8300, "task_loss": 1.0124170780181885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6733958721160889, "epoch": 7.02, "learning_rate": 1.657274349582042e-05, "loss": 0.7511, "step": 8301, "task_loss": 0.6540461182594299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1094497442245483, "epoch": 7.02, "learning_rate": 1.6568047337278108e-05, "loss": 0.6181, "step": 8302, "task_loss": 0.6314953565597534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8155375719070435, "epoch": 7.02, "learning_rate": 1.6563351178735794e-05, "loss": 0.7071, "step": 8303, "task_loss": 0.8100626468658447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4388219714164734, "epoch": 7.02, "learning_rate": 1.6558655020193484e-05, "loss": 0.5276, "step": 8304, "task_loss": 0.7258274555206299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.894784688949585, "epoch": 7.02, "learning_rate": 1.655395886165117e-05, "loss": 0.6891, "step": 8305, "task_loss": 1.188578486442566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8514228463172913, "epoch": 7.02, "learning_rate": 1.654926270310886e-05, "loss": 0.6633, "step": 8306, "task_loss": 0.9450716376304626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6548452377319336, "epoch": 7.02, "learning_rate": 1.6544566544566546e-05, "loss": 0.7705, "step": 8307, "task_loss": 0.5335700511932373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5354621410369873, "epoch": 7.02, "learning_rate": 1.6539870386024233e-05, "loss": 0.6857, "step": 8308, "task_loss": 0.8653998374938965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7935572266578674, "epoch": 7.02, "learning_rate": 1.653517422748192e-05, "loss": 0.5743, "step": 8309, "task_loss": 0.7053214311599731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3738332986831665, "epoch": 7.02, "learning_rate": 1.653047806893961e-05, "loss": 0.4891, "step": 8310, "task_loss": 0.6152140498161316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4086637794971466, "epoch": 7.03, "learning_rate": 1.6525781910397295e-05, "loss": 0.4943, "step": 8311, "task_loss": 0.6559181213378906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5409867763519287, "epoch": 7.03, "learning_rate": 1.6521085751854985e-05, "loss": 0.5073, "step": 8312, "task_loss": 0.6867883205413818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5739309787750244, "epoch": 7.03, "learning_rate": 1.651638959331267e-05, "loss": 0.6707, "step": 8313, "task_loss": 0.4145790636539459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47136417031288147, "epoch": 7.03, "learning_rate": 1.6511693434770357e-05, "loss": 0.5862, "step": 8314, "task_loss": 1.1957221031188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2273435592651367, "epoch": 7.03, "learning_rate": 1.6506997276228044e-05, "loss": 0.7451, "step": 8315, "task_loss": 2.7351536750793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9121997356414795, "epoch": 7.03, "learning_rate": 1.6502301117685733e-05, "loss": 0.7575, "step": 8316, "task_loss": 0.8681555390357971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6077507734298706, "epoch": 7.03, "learning_rate": 1.6497604959143423e-05, "loss": 0.6655, "step": 8317, "task_loss": 0.4065805971622467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23441100120544434, "epoch": 7.03, "learning_rate": 1.649290880060111e-05, "loss": 0.6035, "step": 8318, "task_loss": 0.08628642559051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6208431720733643, "epoch": 7.03, "learning_rate": 1.64882126420588e-05, "loss": 0.6877, "step": 8319, "task_loss": 0.4308265745639801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5482019782066345, "epoch": 7.03, "learning_rate": 1.6483516483516486e-05, "loss": 0.615, "step": 8320, "task_loss": 0.5773104429244995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7421352863311768, "epoch": 7.03, "learning_rate": 1.6478820324974172e-05, "loss": 0.513, "step": 8321, "task_loss": 0.9875578880310059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8845319747924805, "epoch": 7.03, "learning_rate": 1.6474124166431858e-05, "loss": 0.6105, "step": 8322, "task_loss": 1.1386936902999878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6981757283210754, "epoch": 7.04, "learning_rate": 1.6469428007889548e-05, "loss": 0.7267, "step": 8323, "task_loss": 1.6347992420196533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8518170118331909, "epoch": 7.04, "learning_rate": 1.6464731849347234e-05, "loss": 0.6639, "step": 8324, "task_loss": 0.5977916121482849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3985435962677002, "epoch": 7.04, "learning_rate": 1.6460035690804924e-05, "loss": 0.7791, "step": 8325, "task_loss": 0.09390806406736374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38015830516815186, "epoch": 7.04, "learning_rate": 1.645533953226261e-05, "loss": 0.4097, "step": 8326, "task_loss": 0.5957505106925964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49693095684051514, "epoch": 7.04, "learning_rate": 1.6450643373720297e-05, "loss": 0.6061, "step": 8327, "task_loss": 1.3102939128875732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9543126821517944, "epoch": 7.04, "learning_rate": 1.6445947215177983e-05, "loss": 0.7145, "step": 8328, "task_loss": 1.869619369506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4659358561038971, "epoch": 7.04, "learning_rate": 1.6441251056635673e-05, "loss": 0.4675, "step": 8329, "task_loss": 0.3125387728214264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5740473866462708, "epoch": 7.04, "learning_rate": 1.643655489809336e-05, "loss": 0.6931, "step": 8330, "task_loss": 1.0422579050064087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8233962059020996, "epoch": 7.04, "learning_rate": 1.643185873955105e-05, "loss": 0.6962, "step": 8331, "task_loss": 0.5217673778533936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5062025189399719, "epoch": 7.04, "learning_rate": 1.6427162581008735e-05, "loss": 0.5152, "step": 8332, "task_loss": 0.8515752553939819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6172130107879639, "epoch": 7.04, "learning_rate": 1.642246642246642e-05, "loss": 0.5847, "step": 8333, "task_loss": 0.22528043389320374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.750946581363678, "epoch": 7.04, "learning_rate": 1.641777026392411e-05, "loss": 0.6526, "step": 8334, "task_loss": 0.5244506001472473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5777402520179749, "epoch": 7.05, "learning_rate": 1.6413074105381798e-05, "loss": 0.8268, "step": 8335, "task_loss": 0.5747224688529968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5651198625564575, "epoch": 7.05, "learning_rate": 1.6408377946839487e-05, "loss": 0.7463, "step": 8336, "task_loss": 1.891367793083191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7866872549057007, "epoch": 7.05, "learning_rate": 1.6403681788297174e-05, "loss": 0.6005, "step": 8337, "task_loss": 0.4510728716850281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44750288128852844, "epoch": 7.05, "learning_rate": 1.6398985629754863e-05, "loss": 0.5219, "step": 8338, "task_loss": 0.362091600894928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6712720394134521, "epoch": 7.05, "learning_rate": 1.639428947121255e-05, "loss": 0.6354, "step": 8339, "task_loss": 1.7426300048828125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4043712019920349, "epoch": 7.05, "learning_rate": 1.6389593312670236e-05, "loss": 0.5755, "step": 8340, "task_loss": 0.3911268711090088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6254101991653442, "epoch": 7.05, "learning_rate": 1.6384897154127922e-05, "loss": 0.6563, "step": 8341, "task_loss": 0.97128826379776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0306541919708252, "epoch": 7.05, "learning_rate": 1.6380200995585612e-05, "loss": 0.9302, "step": 8342, "task_loss": 0.9813125133514404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7132619619369507, "epoch": 7.05, "learning_rate": 1.63755048370433e-05, "loss": 0.5803, "step": 8343, "task_loss": 0.6856171488761902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.414203941822052, "epoch": 7.05, "learning_rate": 1.6370808678500988e-05, "loss": 0.5955, "step": 8344, "task_loss": 0.8223376274108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5863357782363892, "epoch": 7.05, "learning_rate": 1.6366112519958675e-05, "loss": 0.6269, "step": 8345, "task_loss": 0.8799035549163818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41599979996681213, "epoch": 7.05, "learning_rate": 1.636141636141636e-05, "loss": 0.5447, "step": 8346, "task_loss": 1.122682809829712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.776012122631073, "epoch": 7.06, "learning_rate": 1.6356720202874047e-05, "loss": 0.5992, "step": 8347, "task_loss": 0.962998628616333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3335683047771454, "epoch": 7.06, "learning_rate": 1.6352024044331737e-05, "loss": 0.655, "step": 8348, "task_loss": 0.3722030818462372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0464450120925903, "epoch": 7.06, "learning_rate": 1.6347327885789427e-05, "loss": 0.7909, "step": 8349, "task_loss": 1.3056317567825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9086690545082092, "epoch": 7.06, "learning_rate": 1.6342631727247113e-05, "loss": 0.6709, "step": 8350, "task_loss": 1.1190757751464844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5207901000976562, "epoch": 7.06, "learning_rate": 1.6337935568704803e-05, "loss": 0.6011, "step": 8351, "task_loss": 0.8883943557739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7488521337509155, "epoch": 7.06, "learning_rate": 1.633323941016249e-05, "loss": 0.7343, "step": 8352, "task_loss": 0.7539775371551514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6022988557815552, "epoch": 7.06, "learning_rate": 1.6328543251620175e-05, "loss": 0.4697, "step": 8353, "task_loss": 0.47024282813072205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8555600643157959, "epoch": 7.06, "learning_rate": 1.6323847093077862e-05, "loss": 0.7272, "step": 8354, "task_loss": 0.8987892270088196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7761326432228088, "epoch": 7.06, "learning_rate": 1.631915093453555e-05, "loss": 0.7227, "step": 8355, "task_loss": 0.26742130517959595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4684299826622009, "epoch": 7.06, "learning_rate": 1.6314454775993238e-05, "loss": 0.5858, "step": 8356, "task_loss": 0.9224099516868591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6391803026199341, "epoch": 7.06, "learning_rate": 1.6309758617450928e-05, "loss": 0.6876, "step": 8357, "task_loss": 1.1253814697265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5158731937408447, "epoch": 7.07, "learning_rate": 1.6305062458908614e-05, "loss": 0.5682, "step": 8358, "task_loss": 1.0767371654510498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5337636470794678, "epoch": 7.07, "learning_rate": 1.63003663003663e-05, "loss": 0.672, "step": 8359, "task_loss": 0.8821613788604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.503973126411438, "epoch": 7.07, "learning_rate": 1.6295670141823987e-05, "loss": 0.464, "step": 8360, "task_loss": 0.9670208096504211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6689848303794861, "epoch": 7.07, "learning_rate": 1.6290973983281676e-05, "loss": 0.6229, "step": 8361, "task_loss": 1.2079135179519653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.502200722694397, "epoch": 7.07, "learning_rate": 1.6286277824739363e-05, "loss": 0.5409, "step": 8362, "task_loss": 0.16884960234165192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5786138772964478, "epoch": 7.07, "learning_rate": 1.6281581666197052e-05, "loss": 0.6795, "step": 8363, "task_loss": 0.13145966827869415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7672156691551208, "epoch": 7.07, "learning_rate": 1.6276885507654742e-05, "loss": 0.6615, "step": 8364, "task_loss": 0.8369729518890381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35716062784194946, "epoch": 7.07, "learning_rate": 1.6272189349112425e-05, "loss": 0.5238, "step": 8365, "task_loss": 0.6515402793884277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.517238438129425, "epoch": 7.07, "learning_rate": 1.6267493190570115e-05, "loss": 0.5193, "step": 8366, "task_loss": 0.23876838386058807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3692840337753296, "epoch": 7.07, "learning_rate": 1.62627970320278e-05, "loss": 0.6118, "step": 8367, "task_loss": 0.14951294660568237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46400558948516846, "epoch": 7.07, "learning_rate": 1.625810087348549e-05, "loss": 0.5629, "step": 8368, "task_loss": 0.8510369062423706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6930915713310242, "epoch": 7.07, "learning_rate": 1.6253404714943177e-05, "loss": 0.6677, "step": 8369, "task_loss": 0.7296593189239502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5029653310775757, "epoch": 7.08, "learning_rate": 1.6248708556400867e-05, "loss": 0.7108, "step": 8370, "task_loss": 0.5812140107154846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8901102542877197, "epoch": 7.08, "learning_rate": 1.6244012397858553e-05, "loss": 0.698, "step": 8371, "task_loss": 1.337058424949646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5888836979866028, "epoch": 7.08, "learning_rate": 1.623931623931624e-05, "loss": 0.7048, "step": 8372, "task_loss": 1.8034273386001587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6505317687988281, "epoch": 7.08, "learning_rate": 1.6234620080773926e-05, "loss": 0.7384, "step": 8373, "task_loss": 1.353248953819275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6936500668525696, "epoch": 7.08, "learning_rate": 1.6229923922231616e-05, "loss": 0.7582, "step": 8374, "task_loss": 0.4444423019886017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6090441346168518, "epoch": 7.08, "learning_rate": 1.6225227763689302e-05, "loss": 0.6093, "step": 8375, "task_loss": 0.14551401138305664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.54723060131073, "epoch": 7.08, "learning_rate": 1.6220531605146992e-05, "loss": 0.6566, "step": 8376, "task_loss": 1.1575136184692383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5637768507003784, "epoch": 7.08, "learning_rate": 1.6215835446604678e-05, "loss": 0.6548, "step": 8377, "task_loss": 0.6428893208503723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6288415193557739, "epoch": 7.08, "learning_rate": 1.6211139288062364e-05, "loss": 0.6357, "step": 8378, "task_loss": 0.8283109664916992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7112703919410706, "epoch": 7.08, "learning_rate": 1.6206443129520054e-05, "loss": 0.7574, "step": 8379, "task_loss": 1.2716329097747803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8259702920913696, "epoch": 7.08, "learning_rate": 1.620174697097774e-05, "loss": 0.6221, "step": 8380, "task_loss": 2.2903759479522705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6075233221054077, "epoch": 7.08, "learning_rate": 1.619705081243543e-05, "loss": 0.6167, "step": 8381, "task_loss": 1.2559176683425903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.932050347328186, "epoch": 7.09, "learning_rate": 1.6192354653893117e-05, "loss": 0.7656, "step": 8382, "task_loss": 0.9545952081680298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3104938566684723, "epoch": 7.09, "learning_rate": 1.6187658495350806e-05, "loss": 0.5944, "step": 8383, "task_loss": 1.149950385093689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7924899458885193, "epoch": 7.09, "learning_rate": 1.6182962336808493e-05, "loss": 0.5242, "step": 8384, "task_loss": 1.4406886100769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4501177668571472, "epoch": 7.09, "learning_rate": 1.617826617826618e-05, "loss": 0.6871, "step": 8385, "task_loss": 0.4796721041202545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40108585357666016, "epoch": 7.09, "learning_rate": 1.6173570019723865e-05, "loss": 0.6089, "step": 8386, "task_loss": 0.49110931158065796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0173066854476929, "epoch": 7.09, "learning_rate": 1.6168873861181555e-05, "loss": 0.722, "step": 8387, "task_loss": 0.6607133746147156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48241448402404785, "epoch": 7.09, "learning_rate": 1.616417770263924e-05, "loss": 0.6044, "step": 8388, "task_loss": 0.43951845169067383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7578156590461731, "epoch": 7.09, "learning_rate": 1.615948154409693e-05, "loss": 0.5152, "step": 8389, "task_loss": 1.5265436172485352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.773209810256958, "epoch": 7.09, "learning_rate": 1.6154785385554617e-05, "loss": 0.6822, "step": 8390, "task_loss": 0.5898456573486328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.069957971572876, "epoch": 7.09, "learning_rate": 1.6150089227012304e-05, "loss": 0.7045, "step": 8391, "task_loss": 1.425801396369934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7311309576034546, "epoch": 7.09, "learning_rate": 1.614539306846999e-05, "loss": 0.6637, "step": 8392, "task_loss": 0.9855448007583618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6902347803115845, "epoch": 7.09, "learning_rate": 1.614069690992768e-05, "loss": 0.8205, "step": 8393, "task_loss": 0.6385382413864136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5805254578590393, "epoch": 7.1, "learning_rate": 1.613600075138537e-05, "loss": 0.6405, "step": 8394, "task_loss": 1.0978924036026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4242471158504486, "epoch": 7.1, "learning_rate": 1.6131304592843056e-05, "loss": 0.5221, "step": 8395, "task_loss": 0.236627995967865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7234374284744263, "epoch": 7.1, "learning_rate": 1.6126608434300746e-05, "loss": 0.7463, "step": 8396, "task_loss": 0.532874345779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5922473669052124, "epoch": 7.1, "learning_rate": 1.612191227575843e-05, "loss": 0.6336, "step": 8397, "task_loss": 0.9460464119911194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8816364407539368, "epoch": 7.1, "learning_rate": 1.6117216117216118e-05, "loss": 0.5933, "step": 8398, "task_loss": 1.5511754751205444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7044150233268738, "epoch": 7.1, "learning_rate": 1.6112519958673805e-05, "loss": 0.8032, "step": 8399, "task_loss": 1.2868694067001343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7648730278015137, "epoch": 7.1, "learning_rate": 1.6107823800131494e-05, "loss": 0.8261, "step": 8400, "task_loss": 0.5242278575897217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7344472408294678, "epoch": 7.1, "learning_rate": 1.610312764158918e-05, "loss": 0.6747, "step": 8401, "task_loss": 0.5974429249763489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7975547909736633, "epoch": 7.1, "learning_rate": 1.609843148304687e-05, "loss": 0.672, "step": 8402, "task_loss": 1.1172325611114502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4616832733154297, "epoch": 7.1, "learning_rate": 1.6093735324504557e-05, "loss": 0.7492, "step": 8403, "task_loss": 0.7724802494049072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5627351999282837, "epoch": 7.1, "learning_rate": 1.6089039165962243e-05, "loss": 0.7803, "step": 8404, "task_loss": 0.26859793066978455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5410542488098145, "epoch": 7.1, "learning_rate": 1.608434300741993e-05, "loss": 0.5934, "step": 8405, "task_loss": 0.7923789620399475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48258042335510254, "epoch": 7.11, "learning_rate": 1.607964684887762e-05, "loss": 0.8418, "step": 8406, "task_loss": 1.0501539707183838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5604144334793091, "epoch": 7.11, "learning_rate": 1.6074950690335306e-05, "loss": 0.6774, "step": 8407, "task_loss": 0.9660919904708862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.864712119102478, "epoch": 7.11, "learning_rate": 1.6070254531792995e-05, "loss": 0.7266, "step": 8408, "task_loss": 1.918562650680542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5800137519836426, "epoch": 7.11, "learning_rate": 1.606555837325068e-05, "loss": 0.5854, "step": 8409, "task_loss": 1.0101346969604492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3330104649066925, "epoch": 7.11, "learning_rate": 1.6060862214708368e-05, "loss": 0.5226, "step": 8410, "task_loss": 0.3834267556667328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6610935926437378, "epoch": 7.11, "learning_rate": 1.6056166056166058e-05, "loss": 0.6349, "step": 8411, "task_loss": 0.2818596065044403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5351617932319641, "epoch": 7.11, "learning_rate": 1.6051469897623744e-05, "loss": 0.5172, "step": 8412, "task_loss": 0.3360154926776886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7500707507133484, "epoch": 7.11, "learning_rate": 1.6046773739081434e-05, "loss": 0.708, "step": 8413, "task_loss": 1.231766700744629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8463991284370422, "epoch": 7.11, "learning_rate": 1.604207758053912e-05, "loss": 0.8555, "step": 8414, "task_loss": 0.7372391819953918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6870501041412354, "epoch": 7.11, "learning_rate": 1.603738142199681e-05, "loss": 0.6995, "step": 8415, "task_loss": 0.45287737250328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3237491548061371, "epoch": 7.11, "learning_rate": 1.6032685263454493e-05, "loss": 0.4813, "step": 8416, "task_loss": 0.664273202419281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5925995111465454, "epoch": 7.11, "learning_rate": 1.6027989104912182e-05, "loss": 0.5193, "step": 8417, "task_loss": 0.8665277361869812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5214895606040955, "epoch": 7.12, "learning_rate": 1.602329294636987e-05, "loss": 0.5948, "step": 8418, "task_loss": 0.4023206830024719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6630814075469971, "epoch": 7.12, "learning_rate": 1.601859678782756e-05, "loss": 0.7933, "step": 8419, "task_loss": 0.6199644207954407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4708345830440521, "epoch": 7.12, "learning_rate": 1.6013900629285245e-05, "loss": 0.5506, "step": 8420, "task_loss": 0.6653976440429688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8355787992477417, "epoch": 7.12, "learning_rate": 1.6009204470742935e-05, "loss": 0.6807, "step": 8421, "task_loss": 1.0640462636947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.154185175895691, "epoch": 7.12, "learning_rate": 1.600450831220062e-05, "loss": 0.7133, "step": 8422, "task_loss": 1.1090424060821533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6231474280357361, "epoch": 7.12, "learning_rate": 1.5999812153658307e-05, "loss": 0.8241, "step": 8423, "task_loss": 0.6542881727218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.54659104347229, "epoch": 7.12, "learning_rate": 1.5995115995115994e-05, "loss": 0.7338, "step": 8424, "task_loss": 0.19616270065307617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5259149074554443, "epoch": 7.12, "learning_rate": 1.5990419836573683e-05, "loss": 0.6305, "step": 8425, "task_loss": 1.3076406717300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3779906630516052, "epoch": 7.12, "learning_rate": 1.5985723678031373e-05, "loss": 0.5903, "step": 8426, "task_loss": 0.4189998209476471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40941327810287476, "epoch": 7.12, "learning_rate": 1.598102751948906e-05, "loss": 0.5589, "step": 8427, "task_loss": 0.6184937953948975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5693541765213013, "epoch": 7.12, "learning_rate": 1.5976331360946746e-05, "loss": 0.7528, "step": 8428, "task_loss": 0.649507462978363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.128657579421997, "epoch": 7.13, "learning_rate": 1.5971635202404432e-05, "loss": 0.8458, "step": 8429, "task_loss": 1.6795547008514404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6948497295379639, "epoch": 7.13, "learning_rate": 1.5966939043862122e-05, "loss": 0.6684, "step": 8430, "task_loss": 1.08279287815094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4222497344017029, "epoch": 7.13, "learning_rate": 1.5962242885319808e-05, "loss": 0.6892, "step": 8431, "task_loss": 0.14490440487861633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8063434958457947, "epoch": 7.13, "learning_rate": 1.5957546726777498e-05, "loss": 0.6041, "step": 8432, "task_loss": 0.9191286563873291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7302582263946533, "epoch": 7.13, "learning_rate": 1.5952850568235184e-05, "loss": 0.5793, "step": 8433, "task_loss": 0.8220800757408142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8247674703598022, "epoch": 7.13, "learning_rate": 1.5948154409692874e-05, "loss": 0.58, "step": 8434, "task_loss": 1.2602459192276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33323851227760315, "epoch": 7.13, "learning_rate": 1.594345825115056e-05, "loss": 0.5572, "step": 8435, "task_loss": 0.5495594143867493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45829302072525024, "epoch": 7.13, "learning_rate": 1.5938762092608247e-05, "loss": 0.6407, "step": 8436, "task_loss": 0.31376197934150696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6748392581939697, "epoch": 7.13, "learning_rate": 1.5934065934065933e-05, "loss": 0.744, "step": 8437, "task_loss": 0.6346133351325989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7275195121765137, "epoch": 7.13, "learning_rate": 1.5929369775523623e-05, "loss": 0.774, "step": 8438, "task_loss": 1.1711560487747192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4574977159500122, "epoch": 7.13, "learning_rate": 1.592467361698131e-05, "loss": 0.4762, "step": 8439, "task_loss": 0.46302270889282227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6328120231628418, "epoch": 7.13, "learning_rate": 1.5919977458439e-05, "loss": 0.5774, "step": 8440, "task_loss": 0.7905809879302979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6379467844963074, "epoch": 7.14, "learning_rate": 1.5915281299896685e-05, "loss": 0.9362, "step": 8441, "task_loss": 1.0344184637069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7461031675338745, "epoch": 7.14, "learning_rate": 1.591058514135437e-05, "loss": 0.667, "step": 8442, "task_loss": 1.4348578453063965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6148496270179749, "epoch": 7.14, "learning_rate": 1.590588898281206e-05, "loss": 0.764, "step": 8443, "task_loss": 0.8455683588981628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3376334011554718, "epoch": 7.14, "learning_rate": 1.5901192824269748e-05, "loss": 0.647, "step": 8444, "task_loss": 0.19927376508712769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9140491485595703, "epoch": 7.14, "learning_rate": 1.5896496665727437e-05, "loss": 0.85, "step": 8445, "task_loss": 1.9344511032104492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3607456088066101, "epoch": 7.14, "learning_rate": 1.5891800507185124e-05, "loss": 0.7155, "step": 8446, "task_loss": 0.8040563464164734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9077380299568176, "epoch": 7.14, "learning_rate": 1.5887104348642813e-05, "loss": 0.5832, "step": 8447, "task_loss": 1.0322580337524414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7491998076438904, "epoch": 7.14, "learning_rate": 1.5882408190100496e-05, "loss": 0.6584, "step": 8448, "task_loss": 1.1641910076141357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5760918855667114, "epoch": 7.14, "learning_rate": 1.5877712031558186e-05, "loss": 0.6219, "step": 8449, "task_loss": 0.26931309700012207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4091125726699829, "epoch": 7.14, "learning_rate": 1.5873015873015872e-05, "loss": 0.581, "step": 8450, "task_loss": 0.8228166699409485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6835980415344238, "epoch": 7.14, "learning_rate": 1.5868319714473562e-05, "loss": 0.6247, "step": 8451, "task_loss": 0.39903631806373596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.681440532207489, "epoch": 7.14, "learning_rate": 1.586362355593125e-05, "loss": 0.5343, "step": 8452, "task_loss": 0.3633407652378082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6754559874534607, "epoch": 7.15, "learning_rate": 1.5858927397388938e-05, "loss": 0.5383, "step": 8453, "task_loss": 0.3753546476364136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4289522171020508, "epoch": 7.15, "learning_rate": 1.5854231238846624e-05, "loss": 0.5601, "step": 8454, "task_loss": 0.7807856798171997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7678532600402832, "epoch": 7.15, "learning_rate": 1.584953508030431e-05, "loss": 0.718, "step": 8455, "task_loss": 0.7275285720825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4696681797504425, "epoch": 7.15, "learning_rate": 1.5844838921762e-05, "loss": 0.5792, "step": 8456, "task_loss": 0.4685693085193634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4084080457687378, "epoch": 7.15, "learning_rate": 1.5840142763219687e-05, "loss": 0.7548, "step": 8457, "task_loss": 1.0007113218307495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5539934635162354, "epoch": 7.15, "learning_rate": 1.5835446604677377e-05, "loss": 0.5574, "step": 8458, "task_loss": 0.423354834318161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8159828186035156, "epoch": 7.15, "learning_rate": 1.5830750446135063e-05, "loss": 0.6891, "step": 8459, "task_loss": 0.5290579199790955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4651435911655426, "epoch": 7.15, "learning_rate": 1.582605428759275e-05, "loss": 0.8403, "step": 8460, "task_loss": 0.23370493948459625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3765687346458435, "epoch": 7.15, "learning_rate": 1.5821358129050436e-05, "loss": 0.5758, "step": 8461, "task_loss": 0.6155165433883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.357231855392456, "epoch": 7.15, "learning_rate": 1.5816661970508125e-05, "loss": 0.7839, "step": 8462, "task_loss": 0.6769721508026123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3061888217926025, "epoch": 7.15, "learning_rate": 1.581196581196581e-05, "loss": 0.8217, "step": 8463, "task_loss": 1.1904014348983765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7457218170166016, "epoch": 7.15, "learning_rate": 1.58072696534235e-05, "loss": 0.5128, "step": 8464, "task_loss": 0.7859249114990234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3024817109107971, "epoch": 7.16, "learning_rate": 1.5802573494881188e-05, "loss": 0.6329, "step": 8465, "task_loss": 0.831601083278656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28634271025657654, "epoch": 7.16, "learning_rate": 1.5797877336338877e-05, "loss": 0.418, "step": 8466, "task_loss": 0.1140139102935791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39979425072669983, "epoch": 7.16, "learning_rate": 1.5793181177796564e-05, "loss": 0.4887, "step": 8467, "task_loss": 0.555949330329895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6898396015167236, "epoch": 7.16, "learning_rate": 1.578848501925425e-05, "loss": 0.6519, "step": 8468, "task_loss": 0.49919649958610535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0725054740905762, "epoch": 7.16, "learning_rate": 1.5783788860711936e-05, "loss": 0.6297, "step": 8469, "task_loss": 1.0409464836120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35134634375572205, "epoch": 7.16, "learning_rate": 1.5779092702169626e-05, "loss": 0.5515, "step": 8470, "task_loss": 0.1648109257221222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.92381352186203, "epoch": 7.16, "learning_rate": 1.5774396543627313e-05, "loss": 0.7499, "step": 8471, "task_loss": 0.49265220761299133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7071285247802734, "epoch": 7.16, "learning_rate": 1.5769700385085002e-05, "loss": 0.6543, "step": 8472, "task_loss": 0.687382698059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065130829811096, "epoch": 7.16, "learning_rate": 1.576500422654269e-05, "loss": 0.6698, "step": 8473, "task_loss": 0.5867818593978882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6493736505508423, "epoch": 7.16, "learning_rate": 1.5760308068000375e-05, "loss": 0.6903, "step": 8474, "task_loss": 1.305192232131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.539053738117218, "epoch": 7.16, "learning_rate": 1.5755611909458065e-05, "loss": 0.5821, "step": 8475, "task_loss": 0.9122236967086792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5077470541000366, "epoch": 7.16, "learning_rate": 1.575091575091575e-05, "loss": 0.6549, "step": 8476, "task_loss": 0.6650866270065308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47042301297187805, "epoch": 7.17, "learning_rate": 1.574621959237344e-05, "loss": 0.809, "step": 8477, "task_loss": 0.9839155673980713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6949329972267151, "epoch": 7.17, "learning_rate": 1.5741523433831127e-05, "loss": 0.8172, "step": 8478, "task_loss": 1.3325800895690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5441287159919739, "epoch": 7.17, "learning_rate": 1.5736827275288817e-05, "loss": 0.6818, "step": 8479, "task_loss": 0.9120951890945435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6307831406593323, "epoch": 7.17, "learning_rate": 1.57321311167465e-05, "loss": 0.6581, "step": 8480, "task_loss": 0.6517413258552551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43103671073913574, "epoch": 7.17, "learning_rate": 1.572743495820419e-05, "loss": 0.5377, "step": 8481, "task_loss": 0.5408791899681091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7880616188049316, "epoch": 7.17, "learning_rate": 1.5722738799661876e-05, "loss": 0.7546, "step": 8482, "task_loss": 1.2986208200454712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.215588927268982, "epoch": 7.17, "learning_rate": 1.5718042641119566e-05, "loss": 0.803, "step": 8483, "task_loss": 2.5933656692504883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8796892166137695, "epoch": 7.17, "learning_rate": 1.5713346482577252e-05, "loss": 0.6967, "step": 8484, "task_loss": 0.24904394149780273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47419190406799316, "epoch": 7.17, "learning_rate": 1.570865032403494e-05, "loss": 0.7735, "step": 8485, "task_loss": 0.5119695663452148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4397846758365631, "epoch": 7.17, "learning_rate": 1.5703954165492628e-05, "loss": 0.6287, "step": 8486, "task_loss": 0.36134228110313416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4957908093929291, "epoch": 7.17, "learning_rate": 1.5699258006950314e-05, "loss": 0.6918, "step": 8487, "task_loss": 0.4896157383918762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4418303370475769, "epoch": 7.17, "learning_rate": 1.5694561848408004e-05, "loss": 0.5859, "step": 8488, "task_loss": 0.5154657363891602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6294084787368774, "epoch": 7.18, "learning_rate": 1.568986568986569e-05, "loss": 0.735, "step": 8489, "task_loss": 0.41483432054519653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5685089230537415, "epoch": 7.18, "learning_rate": 1.568516953132338e-05, "loss": 0.6569, "step": 8490, "task_loss": 0.3258821666240692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0115339756011963, "epoch": 7.18, "learning_rate": 1.5680473372781066e-05, "loss": 0.7211, "step": 8491, "task_loss": 1.0324640274047852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4696957767009735, "epoch": 7.18, "learning_rate": 1.5675777214238753e-05, "loss": 0.6303, "step": 8492, "task_loss": 0.4900047183036804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4366559982299805, "epoch": 7.18, "learning_rate": 1.567108105569644e-05, "loss": 0.7852, "step": 8493, "task_loss": 0.6420791745185852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7708212733268738, "epoch": 7.18, "learning_rate": 1.566638489715413e-05, "loss": 0.6284, "step": 8494, "task_loss": 1.3581048250198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7083960175514221, "epoch": 7.18, "learning_rate": 1.5661688738611815e-05, "loss": 0.633, "step": 8495, "task_loss": 0.3804129362106323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31048887968063354, "epoch": 7.18, "learning_rate": 1.5656992580069505e-05, "loss": 0.4934, "step": 8496, "task_loss": 0.03558212146162987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31715330481529236, "epoch": 7.18, "learning_rate": 1.565229642152719e-05, "loss": 0.7615, "step": 8497, "task_loss": 0.09065178036689758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6127007007598877, "epoch": 7.18, "learning_rate": 1.564760026298488e-05, "loss": 0.6427, "step": 8498, "task_loss": 0.2927553355693817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6079705953598022, "epoch": 7.18, "learning_rate": 1.5642904104442564e-05, "loss": 0.7487, "step": 8499, "task_loss": 1.2215237617492676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8183196187019348, "epoch": 7.19, "learning_rate": 1.5638207945900254e-05, "loss": 0.5683, "step": 8500, "task_loss": 0.743514895439148 }, { "epoch": 7.19, "eval_accuracy": 0.8953267326732673, "eval_loss": 0.43779489398002625, "eval_runtime": 227.9826, "eval_samples_per_second": 110.754, "eval_steps_per_second": 0.868, "step": 8500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.990864098072052, "epoch": 7.19, "learning_rate": 1.563351178735794e-05, "loss": 0.7585, "step": 8501, "task_loss": 1.5002120733261108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6665123701095581, "epoch": 7.19, "learning_rate": 1.562881562881563e-05, "loss": 0.5778, "step": 8502, "task_loss": 0.3807893693447113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.370889276266098, "epoch": 7.19, "learning_rate": 1.562411947027332e-05, "loss": 0.5326, "step": 8503, "task_loss": 0.12369082123041153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6021170616149902, "epoch": 7.19, "learning_rate": 1.5619423311731006e-05, "loss": 0.7089, "step": 8504, "task_loss": 1.3640639781951904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7281806468963623, "epoch": 7.19, "learning_rate": 1.5614727153188692e-05, "loss": 0.6435, "step": 8505, "task_loss": 0.7102740406990051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9422788619995117, "epoch": 7.19, "learning_rate": 1.561003099464638e-05, "loss": 0.6493, "step": 8506, "task_loss": 0.6679918766021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8928893208503723, "epoch": 7.19, "learning_rate": 1.5605334836104068e-05, "loss": 0.8878, "step": 8507, "task_loss": 0.8306841254234314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6426607370376587, "epoch": 7.19, "learning_rate": 1.5600638677561755e-05, "loss": 0.6971, "step": 8508, "task_loss": 0.1507406383752823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6246156096458435, "epoch": 7.19, "learning_rate": 1.5595942519019444e-05, "loss": 0.6238, "step": 8509, "task_loss": 0.6361398696899414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7205590009689331, "epoch": 7.19, "learning_rate": 1.559124636047713e-05, "loss": 0.6131, "step": 8510, "task_loss": 1.0189521312713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9742196202278137, "epoch": 7.19, "learning_rate": 1.5586550201934817e-05, "loss": 0.9449, "step": 8511, "task_loss": 1.8274142742156982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7933133840560913, "epoch": 7.2, "learning_rate": 1.5581854043392503e-05, "loss": 0.8507, "step": 8512, "task_loss": 0.9836976528167725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7427475452423096, "epoch": 7.2, "learning_rate": 1.5577157884850193e-05, "loss": 0.6915, "step": 8513, "task_loss": 0.4305715560913086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7178005576133728, "epoch": 7.2, "learning_rate": 1.557246172630788e-05, "loss": 0.7118, "step": 8514, "task_loss": 0.8173565864562988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29163259267807007, "epoch": 7.2, "learning_rate": 1.556776556776557e-05, "loss": 0.5681, "step": 8515, "task_loss": 0.6637154817581177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6451959609985352, "epoch": 7.2, "learning_rate": 1.5563069409223255e-05, "loss": 0.6489, "step": 8516, "task_loss": 0.6102973222732544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6916806697845459, "epoch": 7.2, "learning_rate": 1.5558373250680945e-05, "loss": 0.5728, "step": 8517, "task_loss": 0.7129629850387573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5293452739715576, "epoch": 7.2, "learning_rate": 1.555367709213863e-05, "loss": 0.529, "step": 8518, "task_loss": 0.3213886320590973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3538872003555298, "epoch": 7.2, "learning_rate": 1.5548980933596318e-05, "loss": 0.484, "step": 8519, "task_loss": 0.3541630208492279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46898022294044495, "epoch": 7.2, "learning_rate": 1.5544284775054008e-05, "loss": 0.557, "step": 8520, "task_loss": 0.5379285216331482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.711785078048706, "epoch": 7.2, "learning_rate": 1.5539588616511694e-05, "loss": 0.7633, "step": 8521, "task_loss": 0.36310264468193054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8124290704727173, "epoch": 7.2, "learning_rate": 1.5534892457969384e-05, "loss": 0.7877, "step": 8522, "task_loss": 0.8821300864219666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8072690963745117, "epoch": 7.2, "learning_rate": 1.553019629942707e-05, "loss": 0.5839, "step": 8523, "task_loss": 0.8811415433883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7129310369491577, "epoch": 7.21, "learning_rate": 1.5525500140884756e-05, "loss": 0.5829, "step": 8524, "task_loss": 0.7109659314155579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7270245552062988, "epoch": 7.21, "learning_rate": 1.5520803982342443e-05, "loss": 0.5977, "step": 8525, "task_loss": 0.09737304598093033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8808525204658508, "epoch": 7.21, "learning_rate": 1.5516107823800132e-05, "loss": 0.6795, "step": 8526, "task_loss": 0.7340739965438843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0130929946899414, "epoch": 7.21, "learning_rate": 1.551141166525782e-05, "loss": 0.7507, "step": 8527, "task_loss": 0.5438169836997986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2938401401042938, "epoch": 7.21, "learning_rate": 1.550671550671551e-05, "loss": 0.7239, "step": 8528, "task_loss": 0.34175828099250793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5199658274650574, "epoch": 7.21, "learning_rate": 1.5502019348173195e-05, "loss": 0.6032, "step": 8529, "task_loss": 0.10153978317975998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6071274876594543, "epoch": 7.21, "learning_rate": 1.5497323189630885e-05, "loss": 0.7428, "step": 8530, "task_loss": 1.5141441822052002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5149941444396973, "epoch": 7.21, "learning_rate": 1.5492627031088567e-05, "loss": 0.6882, "step": 8531, "task_loss": 0.6275885105133057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8136538863182068, "epoch": 7.21, "learning_rate": 1.5487930872546257e-05, "loss": 0.6001, "step": 8532, "task_loss": 0.5640076994895935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6719343066215515, "epoch": 7.21, "learning_rate": 1.5483234714003947e-05, "loss": 0.7496, "step": 8533, "task_loss": 1.6522986888885498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40758007764816284, "epoch": 7.21, "learning_rate": 1.5478538555461633e-05, "loss": 0.6011, "step": 8534, "task_loss": 0.13265427947044373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.976730227470398, "epoch": 7.21, "learning_rate": 1.5473842396919323e-05, "loss": 0.6599, "step": 8535, "task_loss": 0.9018559455871582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6443747282028198, "epoch": 7.22, "learning_rate": 1.546914623837701e-05, "loss": 0.6681, "step": 8536, "task_loss": 0.5526732206344604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9762014746665955, "epoch": 7.22, "learning_rate": 1.5464450079834696e-05, "loss": 0.6616, "step": 8537, "task_loss": 0.34834176301956177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4865655303001404, "epoch": 7.22, "learning_rate": 1.5459753921292382e-05, "loss": 0.6129, "step": 8538, "task_loss": 0.5381414890289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49180179834365845, "epoch": 7.22, "learning_rate": 1.5455057762750072e-05, "loss": 0.6005, "step": 8539, "task_loss": 1.3672940731048584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7183713912963867, "epoch": 7.22, "learning_rate": 1.5450361604207758e-05, "loss": 0.5527, "step": 8540, "task_loss": 0.7811440229415894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5699954032897949, "epoch": 7.22, "learning_rate": 1.5445665445665448e-05, "loss": 0.6313, "step": 8541, "task_loss": 0.3669542968273163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6848151683807373, "epoch": 7.22, "learning_rate": 1.5440969287123134e-05, "loss": 0.6532, "step": 8542, "task_loss": 1.2367814779281616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8387200236320496, "epoch": 7.22, "learning_rate": 1.543627312858082e-05, "loss": 0.7712, "step": 8543, "task_loss": 0.5017297267913818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7719025611877441, "epoch": 7.22, "learning_rate": 1.5431576970038507e-05, "loss": 0.6654, "step": 8544, "task_loss": 0.5082730054855347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4586191475391388, "epoch": 7.22, "learning_rate": 1.5426880811496197e-05, "loss": 0.5572, "step": 8545, "task_loss": 0.37065306305885315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7843990325927734, "epoch": 7.22, "learning_rate": 1.5422184652953883e-05, "loss": 0.7177, "step": 8546, "task_loss": 1.9631074666976929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6040240526199341, "epoch": 7.22, "learning_rate": 1.5417488494411573e-05, "loss": 0.6309, "step": 8547, "task_loss": 0.6570003032684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5048094987869263, "epoch": 7.23, "learning_rate": 1.541279233586926e-05, "loss": 0.5276, "step": 8548, "task_loss": 0.6570563912391663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7801402807235718, "epoch": 7.23, "learning_rate": 1.540809617732695e-05, "loss": 0.6239, "step": 8549, "task_loss": 0.6335853934288025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5466333627700806, "epoch": 7.23, "learning_rate": 1.5403400018784635e-05, "loss": 0.5655, "step": 8550, "task_loss": 0.58831787109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7508009076118469, "epoch": 7.23, "learning_rate": 1.539870386024232e-05, "loss": 0.7145, "step": 8551, "task_loss": 0.9097232222557068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8376821875572205, "epoch": 7.23, "learning_rate": 1.539400770170001e-05, "loss": 0.7567, "step": 8552, "task_loss": 1.0124176740646362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5884583592414856, "epoch": 7.23, "learning_rate": 1.5389311543157697e-05, "loss": 0.6158, "step": 8553, "task_loss": 0.4630068838596344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5653434991836548, "epoch": 7.23, "learning_rate": 1.5384615384615387e-05, "loss": 0.5671, "step": 8554, "task_loss": 0.3201290965080261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6270184516906738, "epoch": 7.23, "learning_rate": 1.5379919226073074e-05, "loss": 0.686, "step": 8555, "task_loss": 0.6143503189086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4465900659561157, "epoch": 7.23, "learning_rate": 1.537522306753076e-05, "loss": 0.6051, "step": 8556, "task_loss": 0.6222668290138245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4419277012348175, "epoch": 7.23, "learning_rate": 1.5370526908988446e-05, "loss": 0.6918, "step": 8557, "task_loss": 1.2409132719039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4639429748058319, "epoch": 7.23, "learning_rate": 1.5365830750446136e-05, "loss": 0.5318, "step": 8558, "task_loss": 0.3866361677646637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45016348361968994, "epoch": 7.23, "learning_rate": 1.5361134591903822e-05, "loss": 0.5282, "step": 8559, "task_loss": 0.9374175667762756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5793968439102173, "epoch": 7.24, "learning_rate": 1.5356438433361512e-05, "loss": 0.6481, "step": 8560, "task_loss": 0.47957471013069153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4076997935771942, "epoch": 7.24, "learning_rate": 1.53517422748192e-05, "loss": 0.7701, "step": 8561, "task_loss": 0.902180552482605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7959674596786499, "epoch": 7.24, "learning_rate": 1.5347046116276888e-05, "loss": 0.8035, "step": 8562, "task_loss": 0.559281051158905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33718767762184143, "epoch": 7.24, "learning_rate": 1.534234995773457e-05, "loss": 0.5085, "step": 8563, "task_loss": 0.1344453990459442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7191209197044373, "epoch": 7.24, "learning_rate": 1.533765379919226e-05, "loss": 0.7322, "step": 8564, "task_loss": 1.3293659687042236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7134954929351807, "epoch": 7.24, "learning_rate": 1.533295764064995e-05, "loss": 0.759, "step": 8565, "task_loss": 0.8483378887176514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7420100569725037, "epoch": 7.24, "learning_rate": 1.5328261482107637e-05, "loss": 0.7333, "step": 8566, "task_loss": 0.6308411955833435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35758548974990845, "epoch": 7.24, "learning_rate": 1.5323565323565327e-05, "loss": 0.5551, "step": 8567, "task_loss": 0.4732878506183624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4367782771587372, "epoch": 7.24, "learning_rate": 1.5318869165023013e-05, "loss": 0.575, "step": 8568, "task_loss": 0.3457781672477722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5821860432624817, "epoch": 7.24, "learning_rate": 1.53141730064807e-05, "loss": 0.563, "step": 8569, "task_loss": 0.8253406286239624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0024961233139038, "epoch": 7.24, "learning_rate": 1.5309476847938386e-05, "loss": 0.6662, "step": 8570, "task_loss": 0.9171218872070312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5770313739776611, "epoch": 7.24, "learning_rate": 1.5304780689396075e-05, "loss": 0.6166, "step": 8571, "task_loss": 0.8891733288764954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5433895587921143, "epoch": 7.25, "learning_rate": 1.530008453085376e-05, "loss": 0.5547, "step": 8572, "task_loss": 0.22213652729988098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4512890875339508, "epoch": 7.25, "learning_rate": 1.529538837231145e-05, "loss": 0.6617, "step": 8573, "task_loss": 1.490756630897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6963578462600708, "epoch": 7.25, "learning_rate": 1.5290692213769138e-05, "loss": 0.7615, "step": 8574, "task_loss": 0.9361996650695801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6843407154083252, "epoch": 7.25, "learning_rate": 1.5285996055226824e-05, "loss": 0.7653, "step": 8575, "task_loss": 1.0033915042877197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.701684832572937, "epoch": 7.25, "learning_rate": 1.528129989668451e-05, "loss": 0.7899, "step": 8576, "task_loss": 0.13881148397922516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5214868783950806, "epoch": 7.25, "learning_rate": 1.52766037381422e-05, "loss": 0.5854, "step": 8577, "task_loss": 0.21535342931747437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0778827667236328, "epoch": 7.25, "learning_rate": 1.5271907579599886e-05, "loss": 0.7158, "step": 8578, "task_loss": 1.9342153072357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3943029046058655, "epoch": 7.25, "learning_rate": 1.5267211421057576e-05, "loss": 0.7071, "step": 8579, "task_loss": 0.5509730577468872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0713502168655396, "epoch": 7.25, "learning_rate": 1.5262515262515266e-05, "loss": 0.7518, "step": 8580, "task_loss": 1.2229689359664917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5968737006187439, "epoch": 7.25, "learning_rate": 1.525781910397295e-05, "loss": 0.5689, "step": 8581, "task_loss": 0.6183202266693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8938381671905518, "epoch": 7.25, "learning_rate": 1.525312294543064e-05, "loss": 0.7262, "step": 8582, "task_loss": 1.263463020324707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4779461622238159, "epoch": 7.26, "learning_rate": 1.5248426786888325e-05, "loss": 0.6892, "step": 8583, "task_loss": 0.494126558303833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.095337152481079, "epoch": 7.26, "learning_rate": 1.5243730628346015e-05, "loss": 0.6361, "step": 8584, "task_loss": 1.1588629484176636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.199995756149292, "epoch": 7.26, "learning_rate": 1.5239034469803701e-05, "loss": 0.5409, "step": 8585, "task_loss": 0.25120800733566284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7136541604995728, "epoch": 7.26, "learning_rate": 1.5234338311261389e-05, "loss": 0.6815, "step": 8586, "task_loss": 2.2232882976531982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4857661724090576, "epoch": 7.26, "learning_rate": 1.5229642152719075e-05, "loss": 0.6437, "step": 8587, "task_loss": 0.1623697578907013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8200966715812683, "epoch": 7.26, "learning_rate": 1.5224945994176765e-05, "loss": 0.5532, "step": 8588, "task_loss": 0.6997692584991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8869998455047607, "epoch": 7.26, "learning_rate": 1.5220249835634451e-05, "loss": 0.5281, "step": 8589, "task_loss": 1.501904845237732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.171127200126648, "epoch": 7.26, "learning_rate": 1.521555367709214e-05, "loss": 0.7384, "step": 8590, "task_loss": 0.9104582667350769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5572950839996338, "epoch": 7.26, "learning_rate": 1.5210857518549826e-05, "loss": 0.5954, "step": 8591, "task_loss": 0.7898914217948914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.853752613067627, "epoch": 7.26, "learning_rate": 1.5206161360007516e-05, "loss": 0.8039, "step": 8592, "task_loss": 0.5806027054786682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4303239583969116, "epoch": 7.26, "learning_rate": 1.52014652014652e-05, "loss": 0.5116, "step": 8593, "task_loss": 0.5097985863685608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8605223894119263, "epoch": 7.26, "learning_rate": 1.519676904292289e-05, "loss": 0.7475, "step": 8594, "task_loss": 1.351440668106079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4471834599971771, "epoch": 7.27, "learning_rate": 1.5192072884380578e-05, "loss": 0.6156, "step": 8595, "task_loss": 0.4556441307067871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5589646100997925, "epoch": 7.27, "learning_rate": 1.5187376725838264e-05, "loss": 0.6862, "step": 8596, "task_loss": 0.7318354845046997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6227444410324097, "epoch": 7.27, "learning_rate": 1.5182680567295954e-05, "loss": 0.8409, "step": 8597, "task_loss": 0.6903026700019836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47638800740242004, "epoch": 7.27, "learning_rate": 1.517798440875364e-05, "loss": 0.527, "step": 8598, "task_loss": 0.16575603187084198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6527043581008911, "epoch": 7.27, "learning_rate": 1.5173288250211328e-05, "loss": 0.6512, "step": 8599, "task_loss": 0.6359402537345886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4439966678619385, "epoch": 7.27, "learning_rate": 1.5168592091669015e-05, "loss": 0.565, "step": 8600, "task_loss": 1.2435956001281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5545533895492554, "epoch": 7.27, "learning_rate": 1.5163895933126704e-05, "loss": 0.5897, "step": 8601, "task_loss": 0.3241345286369324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42338550090789795, "epoch": 7.27, "learning_rate": 1.515919977458439e-05, "loss": 0.5262, "step": 8602, "task_loss": 0.19649693369865417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1142032146453857, "epoch": 7.27, "learning_rate": 1.5154503616042079e-05, "loss": 0.7495, "step": 8603, "task_loss": 1.2082784175872803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7056745886802673, "epoch": 7.27, "learning_rate": 1.5149807457499765e-05, "loss": 0.4778, "step": 8604, "task_loss": 0.7226399779319763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9025708436965942, "epoch": 7.27, "learning_rate": 1.5145111298957453e-05, "loss": 0.7743, "step": 8605, "task_loss": 1.637715220451355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8044679164886475, "epoch": 7.27, "learning_rate": 1.514041514041514e-05, "loss": 0.7045, "step": 8606, "task_loss": 0.9451128244400024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2821432948112488, "epoch": 7.28, "learning_rate": 1.513571898187283e-05, "loss": 0.515, "step": 8607, "task_loss": 0.4512132704257965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7404207587242126, "epoch": 7.28, "learning_rate": 1.5131022823330516e-05, "loss": 0.6911, "step": 8608, "task_loss": 1.3049473762512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41553324460983276, "epoch": 7.28, "learning_rate": 1.5126326664788204e-05, "loss": 0.4665, "step": 8609, "task_loss": 0.5393701791763306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7349962592124939, "epoch": 7.28, "learning_rate": 1.5121630506245893e-05, "loss": 0.7182, "step": 8610, "task_loss": 0.6264594793319702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37236443161964417, "epoch": 7.28, "learning_rate": 1.511693434770358e-05, "loss": 0.5368, "step": 8611, "task_loss": 0.4163951575756073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4009607434272766, "epoch": 7.28, "learning_rate": 1.5112238189161268e-05, "loss": 0.6087, "step": 8612, "task_loss": 0.13284240663051605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5572596192359924, "epoch": 7.28, "learning_rate": 1.5107542030618954e-05, "loss": 0.71, "step": 8613, "task_loss": 0.7044717073440552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5306487083435059, "epoch": 7.28, "learning_rate": 1.5102845872076644e-05, "loss": 0.5792, "step": 8614, "task_loss": 0.8213581442832947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5952543020248413, "epoch": 7.28, "learning_rate": 1.5098149713534328e-05, "loss": 0.7204, "step": 8615, "task_loss": 0.21191759407520294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5639598965644836, "epoch": 7.28, "learning_rate": 1.5093453554992018e-05, "loss": 0.6268, "step": 8616, "task_loss": 1.499599575996399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49424266815185547, "epoch": 7.28, "learning_rate": 1.5088757396449705e-05, "loss": 0.696, "step": 8617, "task_loss": 0.501703143119812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9524234533309937, "epoch": 7.28, "learning_rate": 1.5084061237907393e-05, "loss": 0.6934, "step": 8618, "task_loss": 0.9482629895210266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30029505491256714, "epoch": 7.29, "learning_rate": 1.5079365079365079e-05, "loss": 0.6299, "step": 8619, "task_loss": 0.14176981151103973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37973201274871826, "epoch": 7.29, "learning_rate": 1.5074668920822769e-05, "loss": 0.4765, "step": 8620, "task_loss": 1.4427130222320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4664275348186493, "epoch": 7.29, "learning_rate": 1.5069972762280455e-05, "loss": 0.6624, "step": 8621, "task_loss": 0.24563375115394592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0374733209609985, "epoch": 7.29, "learning_rate": 1.5065276603738143e-05, "loss": 0.7258, "step": 8622, "task_loss": 1.0693204402923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6193253397941589, "epoch": 7.29, "learning_rate": 1.506058044519583e-05, "loss": 0.7572, "step": 8623, "task_loss": 1.529467225074768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.042726755142212, "epoch": 7.29, "learning_rate": 1.5055884286653519e-05, "loss": 0.7586, "step": 8624, "task_loss": 0.6614536643028259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5109685659408569, "epoch": 7.29, "learning_rate": 1.5051188128111204e-05, "loss": 0.8419, "step": 8625, "task_loss": 0.14167331159114838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46367791295051575, "epoch": 7.29, "learning_rate": 1.5046491969568893e-05, "loss": 0.5989, "step": 8626, "task_loss": 0.38729405403137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0347099304199219, "epoch": 7.29, "learning_rate": 1.5041795811026581e-05, "loss": 0.8293, "step": 8627, "task_loss": 0.9797528982162476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.439480721950531, "epoch": 7.29, "learning_rate": 1.5037099652484268e-05, "loss": 0.5909, "step": 8628, "task_loss": 0.19808103144168854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5047893524169922, "epoch": 7.29, "learning_rate": 1.5032403493941958e-05, "loss": 0.6835, "step": 8629, "task_loss": 1.209742546081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8834391832351685, "epoch": 7.29, "learning_rate": 1.5027707335399644e-05, "loss": 0.7953, "step": 8630, "task_loss": 0.6462968587875366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5895835757255554, "epoch": 7.3, "learning_rate": 1.5023011176857332e-05, "loss": 0.5498, "step": 8631, "task_loss": 0.33146315813064575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.544841468334198, "epoch": 7.3, "learning_rate": 1.5018315018315018e-05, "loss": 0.5888, "step": 8632, "task_loss": 0.17890118062496185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3741670250892639, "epoch": 7.3, "learning_rate": 1.5013618859772708e-05, "loss": 0.4397, "step": 8633, "task_loss": 0.13691885769367218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4873931407928467, "epoch": 7.3, "learning_rate": 1.5008922701230394e-05, "loss": 0.6421, "step": 8634, "task_loss": 0.6182990074157715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6301355361938477, "epoch": 7.3, "learning_rate": 1.5004226542688082e-05, "loss": 0.6625, "step": 8635, "task_loss": 0.8585296273231506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6403987407684326, "epoch": 7.3, "learning_rate": 1.4999530384145769e-05, "loss": 0.6281, "step": 8636, "task_loss": 1.122861385345459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5298886299133301, "epoch": 7.3, "learning_rate": 1.4994834225603457e-05, "loss": 0.58, "step": 8637, "task_loss": 0.4831189215183258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7167404890060425, "epoch": 7.3, "learning_rate": 1.4990138067061143e-05, "loss": 0.4759, "step": 8638, "task_loss": 0.7935507297515869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49640846252441406, "epoch": 7.3, "learning_rate": 1.4985441908518833e-05, "loss": 0.6088, "step": 8639, "task_loss": 0.11342264711856842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6727743148803711, "epoch": 7.3, "learning_rate": 1.4980745749976519e-05, "loss": 0.8105, "step": 8640, "task_loss": 1.0446207523345947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7895699143409729, "epoch": 7.3, "learning_rate": 1.4976049591434207e-05, "loss": 0.5556, "step": 8641, "task_loss": 0.6130087375640869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4656570553779602, "epoch": 7.3, "learning_rate": 1.4971353432891897e-05, "loss": 0.4416, "step": 8642, "task_loss": 0.5980230569839478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8635374903678894, "epoch": 7.31, "learning_rate": 1.4966657274349583e-05, "loss": 0.5072, "step": 8643, "task_loss": 0.4091757535934448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6691185832023621, "epoch": 7.31, "learning_rate": 1.4961961115807271e-05, "loss": 0.5451, "step": 8644, "task_loss": 0.31768468022346497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7229069471359253, "epoch": 7.31, "learning_rate": 1.4957264957264958e-05, "loss": 0.6373, "step": 8645, "task_loss": 0.43700647354125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6575817465782166, "epoch": 7.31, "learning_rate": 1.4952568798722647e-05, "loss": 0.725, "step": 8646, "task_loss": 1.7849916219711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.628978967666626, "epoch": 7.31, "learning_rate": 1.4947872640180332e-05, "loss": 0.6417, "step": 8647, "task_loss": 0.8761529326438904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48294228315353394, "epoch": 7.31, "learning_rate": 1.4943176481638022e-05, "loss": 0.4889, "step": 8648, "task_loss": 0.41177865862846375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6754922866821289, "epoch": 7.31, "learning_rate": 1.4938480323095708e-05, "loss": 0.6276, "step": 8649, "task_loss": 1.0458240509033203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48102807998657227, "epoch": 7.31, "learning_rate": 1.4933784164553396e-05, "loss": 0.5467, "step": 8650, "task_loss": 0.2876395285129547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0646291971206665, "epoch": 7.31, "learning_rate": 1.4929088006011082e-05, "loss": 0.9358, "step": 8651, "task_loss": 1.2112129926681519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6319409608840942, "epoch": 7.31, "learning_rate": 1.4924391847468772e-05, "loss": 0.6308, "step": 8652, "task_loss": 0.8647340536117554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5363558530807495, "epoch": 7.31, "learning_rate": 1.4919695688926458e-05, "loss": 0.5398, "step": 8653, "task_loss": 1.0107996463775635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6766921281814575, "epoch": 7.32, "learning_rate": 1.4914999530384147e-05, "loss": 0.5864, "step": 8654, "task_loss": 1.168833613395691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8869651556015015, "epoch": 7.32, "learning_rate": 1.4910303371841833e-05, "loss": 0.6275, "step": 8655, "task_loss": 1.202113389968872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6148958206176758, "epoch": 7.32, "learning_rate": 1.4905607213299521e-05, "loss": 0.7419, "step": 8656, "task_loss": 0.7117589712142944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4083489775657654, "epoch": 7.32, "learning_rate": 1.490091105475721e-05, "loss": 0.6641, "step": 8657, "task_loss": 0.12528233230113983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4983125329017639, "epoch": 7.32, "learning_rate": 1.4896214896214897e-05, "loss": 0.6684, "step": 8658, "task_loss": 0.960888683795929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6405999660491943, "epoch": 7.32, "learning_rate": 1.4891518737672585e-05, "loss": 0.8201, "step": 8659, "task_loss": 0.6992907524108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8046799898147583, "epoch": 7.32, "learning_rate": 1.4886822579130271e-05, "loss": 0.7135, "step": 8660, "task_loss": 1.0677145719528198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4784087538719177, "epoch": 7.32, "learning_rate": 1.4882126420587961e-05, "loss": 0.556, "step": 8661, "task_loss": 0.9961147308349609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8396099209785461, "epoch": 7.32, "learning_rate": 1.4877430262045647e-05, "loss": 0.6074, "step": 8662, "task_loss": 0.41624683141708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3524929881095886, "epoch": 7.32, "learning_rate": 1.4872734103503335e-05, "loss": 0.5099, "step": 8663, "task_loss": 0.8120133876800537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5391747951507568, "epoch": 7.32, "learning_rate": 1.4868037944961022e-05, "loss": 0.7506, "step": 8664, "task_loss": 0.5638376474380493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8202834129333496, "epoch": 7.32, "learning_rate": 1.4863341786418711e-05, "loss": 0.6622, "step": 8665, "task_loss": 1.4231985807418823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49967795610427856, "epoch": 7.33, "learning_rate": 1.4858645627876396e-05, "loss": 0.5603, "step": 8666, "task_loss": 0.14163760840892792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4648592174053192, "epoch": 7.33, "learning_rate": 1.4853949469334086e-05, "loss": 0.5709, "step": 8667, "task_loss": 0.1473139524459839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7350450754165649, "epoch": 7.33, "learning_rate": 1.4849253310791772e-05, "loss": 0.8494, "step": 8668, "task_loss": 1.2249563932418823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3016418218612671, "epoch": 7.33, "learning_rate": 1.484455715224946e-05, "loss": 0.598, "step": 8669, "task_loss": 0.34194833040237427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5355626344680786, "epoch": 7.33, "learning_rate": 1.4839860993707147e-05, "loss": 0.561, "step": 8670, "task_loss": 0.8661082983016968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6150917410850525, "epoch": 7.33, "learning_rate": 1.4835164835164836e-05, "loss": 0.8662, "step": 8671, "task_loss": 1.6340181827545166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.618216872215271, "epoch": 7.33, "learning_rate": 1.4830468676622524e-05, "loss": 0.5475, "step": 8672, "task_loss": 0.31858983635902405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7242258787155151, "epoch": 7.33, "learning_rate": 1.482577251808021e-05, "loss": 0.6977, "step": 8673, "task_loss": 0.7841537594795227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7661798000335693, "epoch": 7.33, "learning_rate": 1.48210763595379e-05, "loss": 0.664, "step": 8674, "task_loss": 0.6289353370666504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29517796635627747, "epoch": 7.33, "learning_rate": 1.4816380200995587e-05, "loss": 0.4745, "step": 8675, "task_loss": 0.857078492641449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.825374960899353, "epoch": 7.33, "learning_rate": 1.4811684042453275e-05, "loss": 0.7758, "step": 8676, "task_loss": 1.0586249828338623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5238656401634216, "epoch": 7.33, "learning_rate": 1.4806987883910961e-05, "loss": 0.6424, "step": 8677, "task_loss": 0.5459031462669373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49692875146865845, "epoch": 7.34, "learning_rate": 1.4802291725368649e-05, "loss": 0.5896, "step": 8678, "task_loss": 0.8093302249908447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6671885251998901, "epoch": 7.34, "learning_rate": 1.4797595566826335e-05, "loss": 0.7515, "step": 8679, "task_loss": 1.4242732524871826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3940938711166382, "epoch": 7.34, "learning_rate": 1.4792899408284025e-05, "loss": 0.5529, "step": 8680, "task_loss": 0.3324246108531952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5185546278953552, "epoch": 7.34, "learning_rate": 1.4788203249741712e-05, "loss": 0.7947, "step": 8681, "task_loss": 1.0122621059417725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.582156777381897, "epoch": 7.34, "learning_rate": 1.47835070911994e-05, "loss": 0.6, "step": 8682, "task_loss": 1.037845492362976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6790263652801514, "epoch": 7.34, "learning_rate": 1.4778810932657086e-05, "loss": 0.6542, "step": 8683, "task_loss": 0.5317809581756592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5058995485305786, "epoch": 7.34, "learning_rate": 1.4774114774114776e-05, "loss": 0.5704, "step": 8684, "task_loss": 0.6986343264579773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8495147228240967, "epoch": 7.34, "learning_rate": 1.4769418615572462e-05, "loss": 0.7278, "step": 8685, "task_loss": 0.6923937797546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45155662298202515, "epoch": 7.34, "learning_rate": 1.476472245703015e-05, "loss": 0.5432, "step": 8686, "task_loss": 0.6137672066688538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8339681625366211, "epoch": 7.34, "learning_rate": 1.476002629848784e-05, "loss": 0.81, "step": 8687, "task_loss": 0.7972264289855957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.782361626625061, "epoch": 7.34, "learning_rate": 1.4755330139945524e-05, "loss": 0.6808, "step": 8688, "task_loss": 0.4697839617729187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5311969518661499, "epoch": 7.34, "learning_rate": 1.4750633981403214e-05, "loss": 0.6846, "step": 8689, "task_loss": 0.4134114384651184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3339974880218506, "epoch": 7.35, "learning_rate": 1.47459378228609e-05, "loss": 0.7445, "step": 8690, "task_loss": 0.8545857071876526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3742201626300812, "epoch": 7.35, "learning_rate": 1.4741241664318589e-05, "loss": 0.6829, "step": 8691, "task_loss": 0.7112624049186707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37157976627349854, "epoch": 7.35, "learning_rate": 1.4736545505776275e-05, "loss": 0.6935, "step": 8692, "task_loss": 0.3239036798477173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.507222592830658, "epoch": 7.35, "learning_rate": 1.4731849347233965e-05, "loss": 0.5901, "step": 8693, "task_loss": 0.17266567051410675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.906891405582428, "epoch": 7.35, "learning_rate": 1.4727153188691651e-05, "loss": 0.6972, "step": 8694, "task_loss": 1.2502264976501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3405589461326599, "epoch": 7.35, "learning_rate": 1.4722457030149339e-05, "loss": 0.69, "step": 8695, "task_loss": 0.4206240475177765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5094888210296631, "epoch": 7.35, "learning_rate": 1.4717760871607025e-05, "loss": 0.6903, "step": 8696, "task_loss": 0.362775981426239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7108370661735535, "epoch": 7.35, "learning_rate": 1.4713064713064715e-05, "loss": 0.5955, "step": 8697, "task_loss": 1.44813871383667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4439922869205475, "epoch": 7.35, "learning_rate": 1.47083685545224e-05, "loss": 0.6319, "step": 8698, "task_loss": 0.9470319747924805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0167666673660278, "epoch": 7.35, "learning_rate": 1.470367239598009e-05, "loss": 0.8989, "step": 8699, "task_loss": 1.2659785747528076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5396394729614258, "epoch": 7.35, "learning_rate": 1.4698976237437776e-05, "loss": 0.553, "step": 8700, "task_loss": 0.7688786387443542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6694654226303101, "epoch": 7.35, "learning_rate": 1.4694280078895464e-05, "loss": 0.57, "step": 8701, "task_loss": 2.7519748210906982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9801220893859863, "epoch": 7.36, "learning_rate": 1.468958392035315e-05, "loss": 0.7152, "step": 8702, "task_loss": 0.6829496026039124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5833518505096436, "epoch": 7.36, "learning_rate": 1.468488776181084e-05, "loss": 0.6452, "step": 8703, "task_loss": 0.3410288691520691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6381517648696899, "epoch": 7.36, "learning_rate": 1.4680191603268528e-05, "loss": 0.7912, "step": 8704, "task_loss": 0.9382807612419128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5460584163665771, "epoch": 7.36, "learning_rate": 1.4675495444726214e-05, "loss": 0.5594, "step": 8705, "task_loss": 0.9401823878288269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5684000849723816, "epoch": 7.36, "learning_rate": 1.4670799286183904e-05, "loss": 0.6745, "step": 8706, "task_loss": 0.22732090950012207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6048927307128906, "epoch": 7.36, "learning_rate": 1.466610312764159e-05, "loss": 0.5237, "step": 8707, "task_loss": 0.6695435047149658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6827454566955566, "epoch": 7.36, "learning_rate": 1.4661406969099278e-05, "loss": 0.602, "step": 8708, "task_loss": 0.4847581386566162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6723681092262268, "epoch": 7.36, "learning_rate": 1.4656710810556965e-05, "loss": 0.6536, "step": 8709, "task_loss": 0.6679102778434753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45276665687561035, "epoch": 7.36, "learning_rate": 1.4652014652014653e-05, "loss": 0.5876, "step": 8710, "task_loss": 0.27432307600975037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.743088960647583, "epoch": 7.36, "learning_rate": 1.4647318493472339e-05, "loss": 0.72, "step": 8711, "task_loss": 0.42950600385665894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9692265391349792, "epoch": 7.36, "learning_rate": 1.4642622334930029e-05, "loss": 0.7832, "step": 8712, "task_loss": 0.7369881868362427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6633397936820984, "epoch": 7.36, "learning_rate": 1.4637926176387715e-05, "loss": 0.6079, "step": 8713, "task_loss": 0.4508334696292877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5879848599433899, "epoch": 7.37, "learning_rate": 1.4633230017845403e-05, "loss": 0.5735, "step": 8714, "task_loss": 0.42097777128219604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39843183755874634, "epoch": 7.37, "learning_rate": 1.462853385930309e-05, "loss": 0.7038, "step": 8715, "task_loss": 0.9105626940727234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8467691540718079, "epoch": 7.37, "learning_rate": 1.462383770076078e-05, "loss": 0.6148, "step": 8716, "task_loss": 0.4705960750579834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7410545945167542, "epoch": 7.37, "learning_rate": 1.4619141542218464e-05, "loss": 0.6942, "step": 8717, "task_loss": 0.15698537230491638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0142741203308105, "epoch": 7.37, "learning_rate": 1.4614445383676154e-05, "loss": 0.7765, "step": 8718, "task_loss": 0.34348371624946594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4841160178184509, "epoch": 7.37, "learning_rate": 1.4609749225133843e-05, "loss": 0.7112, "step": 8719, "task_loss": 0.2160560041666031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8912450075149536, "epoch": 7.37, "learning_rate": 1.4605053066591528e-05, "loss": 0.8233, "step": 8720, "task_loss": 0.47277340292930603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47758734226226807, "epoch": 7.37, "learning_rate": 1.4600356908049218e-05, "loss": 0.6314, "step": 8721, "task_loss": 0.12993839383125305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5585634708404541, "epoch": 7.37, "learning_rate": 1.4595660749506904e-05, "loss": 0.6004, "step": 8722, "task_loss": 0.6484310030937195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9542001485824585, "epoch": 7.37, "learning_rate": 1.4590964590964592e-05, "loss": 0.7887, "step": 8723, "task_loss": 0.5278677344322205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45783936977386475, "epoch": 7.37, "learning_rate": 1.4586268432422278e-05, "loss": 0.6729, "step": 8724, "task_loss": 0.5096408128738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.668376088142395, "epoch": 7.38, "learning_rate": 1.4581572273879968e-05, "loss": 0.6763, "step": 8725, "task_loss": 0.4645616114139557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3960376977920532, "epoch": 7.38, "learning_rate": 1.4576876115337654e-05, "loss": 0.5561, "step": 8726, "task_loss": 0.8314023017883301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8907775282859802, "epoch": 7.38, "learning_rate": 1.4572179956795342e-05, "loss": 0.5824, "step": 8727, "task_loss": 0.41596946120262146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5307607054710388, "epoch": 7.38, "learning_rate": 1.4567483798253029e-05, "loss": 0.5946, "step": 8728, "task_loss": 1.236758828163147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6529446244239807, "epoch": 7.38, "learning_rate": 1.4562787639710717e-05, "loss": 0.5652, "step": 8729, "task_loss": 0.7934979796409607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45125722885131836, "epoch": 7.38, "learning_rate": 1.4558091481168403e-05, "loss": 0.6542, "step": 8730, "task_loss": 0.8367568850517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5242650508880615, "epoch": 7.38, "learning_rate": 1.4553395322626093e-05, "loss": 0.6832, "step": 8731, "task_loss": 1.2408488988876343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5519081354141235, "epoch": 7.38, "learning_rate": 1.454869916408378e-05, "loss": 0.6061, "step": 8732, "task_loss": 0.860744833946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8766235113143921, "epoch": 7.38, "learning_rate": 1.4544003005541467e-05, "loss": 0.6759, "step": 8733, "task_loss": 1.1656720638275146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5323986411094666, "epoch": 7.38, "learning_rate": 1.4539306846999157e-05, "loss": 0.5581, "step": 8734, "task_loss": 0.4563811719417572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7628233432769775, "epoch": 7.38, "learning_rate": 1.4534610688456843e-05, "loss": 0.6233, "step": 8735, "task_loss": 1.8154648542404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29266467690467834, "epoch": 7.38, "learning_rate": 1.4529914529914531e-05, "loss": 0.7151, "step": 8736, "task_loss": 0.136705681681633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.046168327331543, "epoch": 7.39, "learning_rate": 1.4525218371372218e-05, "loss": 0.8254, "step": 8737, "task_loss": 0.8139264583587646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4476660490036011, "epoch": 7.39, "learning_rate": 1.4520522212829907e-05, "loss": 0.4797, "step": 8738, "task_loss": 0.4886324107646942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29297012090682983, "epoch": 7.39, "learning_rate": 1.4515826054287592e-05, "loss": 0.5864, "step": 8739, "task_loss": 0.16907556354999542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37060993909835815, "epoch": 7.39, "learning_rate": 1.4511129895745282e-05, "loss": 0.6409, "step": 8740, "task_loss": 0.5214897990226746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5165072083473206, "epoch": 7.39, "learning_rate": 1.4506433737202968e-05, "loss": 0.6421, "step": 8741, "task_loss": 0.28513848781585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8321191072463989, "epoch": 7.39, "learning_rate": 1.4501737578660656e-05, "loss": 0.5909, "step": 8742, "task_loss": 0.9241749048233032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7901775240898132, "epoch": 7.39, "learning_rate": 1.4497041420118343e-05, "loss": 0.6769, "step": 8743, "task_loss": 1.4530508518218994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4174685478210449, "epoch": 7.39, "learning_rate": 1.4492345261576032e-05, "loss": 0.6259, "step": 8744, "task_loss": 0.7804873585700989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7847021818161011, "epoch": 7.39, "learning_rate": 1.4487649103033719e-05, "loss": 0.8942, "step": 8745, "task_loss": 1.1442476511001587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6377036571502686, "epoch": 7.39, "learning_rate": 1.4482952944491407e-05, "loss": 0.6128, "step": 8746, "task_loss": 0.3207526206970215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7172498106956482, "epoch": 7.39, "learning_rate": 1.4478256785949093e-05, "loss": 0.5288, "step": 8747, "task_loss": 0.7126907706260681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4844219386577606, "epoch": 7.39, "learning_rate": 1.4473560627406783e-05, "loss": 0.6121, "step": 8748, "task_loss": 1.3001459836959839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7498569488525391, "epoch": 7.4, "learning_rate": 1.446886446886447e-05, "loss": 0.712, "step": 8749, "task_loss": 0.9875186681747437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37392911314964294, "epoch": 7.4, "learning_rate": 1.4464168310322157e-05, "loss": 0.5098, "step": 8750, "task_loss": 0.2572353482246399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5764129161834717, "epoch": 7.4, "learning_rate": 1.4459472151779845e-05, "loss": 0.5735, "step": 8751, "task_loss": 0.2921358644962311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4462422728538513, "epoch": 7.4, "learning_rate": 1.4454775993237531e-05, "loss": 0.6822, "step": 8752, "task_loss": 0.2888237237930298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6485174894332886, "epoch": 7.4, "learning_rate": 1.4450079834695221e-05, "loss": 0.6735, "step": 8753, "task_loss": 0.6915189027786255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7523515224456787, "epoch": 7.4, "learning_rate": 1.4445383676152908e-05, "loss": 0.5511, "step": 8754, "task_loss": 1.1371971368789673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5083110332489014, "epoch": 7.4, "learning_rate": 1.4440687517610596e-05, "loss": 0.4978, "step": 8755, "task_loss": 0.5730241537094116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0605354309082031, "epoch": 7.4, "learning_rate": 1.4435991359068282e-05, "loss": 0.7732, "step": 8756, "task_loss": 0.8700273036956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4830750823020935, "epoch": 7.4, "learning_rate": 1.4431295200525972e-05, "loss": 0.4645, "step": 8757, "task_loss": 0.5995084643363953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8360882997512817, "epoch": 7.4, "learning_rate": 1.4426599041983658e-05, "loss": 0.7932, "step": 8758, "task_loss": 1.0043638944625854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9077550172805786, "epoch": 7.4, "learning_rate": 1.4421902883441346e-05, "loss": 0.7436, "step": 8759, "task_loss": 2.477295160293579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7847498655319214, "epoch": 7.4, "learning_rate": 1.4417206724899032e-05, "loss": 0.6052, "step": 8760, "task_loss": 1.7526910305023193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6511313915252686, "epoch": 7.41, "learning_rate": 1.441251056635672e-05, "loss": 0.5758, "step": 8761, "task_loss": 0.6303622126579285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7562260627746582, "epoch": 7.41, "learning_rate": 1.4407814407814407e-05, "loss": 0.6852, "step": 8762, "task_loss": 0.9909593462944031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8733607530593872, "epoch": 7.41, "learning_rate": 1.4403118249272096e-05, "loss": 0.6407, "step": 8763, "task_loss": 1.2774608135223389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5715016722679138, "epoch": 7.41, "learning_rate": 1.4398422090729784e-05, "loss": 0.6216, "step": 8764, "task_loss": 0.7382771968841553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5520987510681152, "epoch": 7.41, "learning_rate": 1.439372593218747e-05, "loss": 0.7611, "step": 8765, "task_loss": 0.5457879304885864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6868373155593872, "epoch": 7.41, "learning_rate": 1.438902977364516e-05, "loss": 0.6991, "step": 8766, "task_loss": 1.0754362344741821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8684296011924744, "epoch": 7.41, "learning_rate": 1.4384333615102847e-05, "loss": 0.6504, "step": 8767, "task_loss": 0.848599374294281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43506577610969543, "epoch": 7.41, "learning_rate": 1.4379637456560535e-05, "loss": 0.5854, "step": 8768, "task_loss": 0.5774693489074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40215206146240234, "epoch": 7.41, "learning_rate": 1.4374941298018221e-05, "loss": 0.5609, "step": 8769, "task_loss": 0.2691816985607147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6169037818908691, "epoch": 7.41, "learning_rate": 1.4370245139475911e-05, "loss": 0.5367, "step": 8770, "task_loss": 0.3403697609901428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4808915853500366, "epoch": 7.41, "learning_rate": 1.4365548980933596e-05, "loss": 0.573, "step": 8771, "task_loss": 0.3571156859397888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5896939039230347, "epoch": 7.41, "learning_rate": 1.4360852822391285e-05, "loss": 0.5652, "step": 8772, "task_loss": 1.4553383588790894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5954751968383789, "epoch": 7.42, "learning_rate": 1.4356156663848972e-05, "loss": 0.6238, "step": 8773, "task_loss": 0.45196864008903503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6677011251449585, "epoch": 7.42, "learning_rate": 1.435146050530666e-05, "loss": 0.654, "step": 8774, "task_loss": 0.4764656722545624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7266092300415039, "epoch": 7.42, "learning_rate": 1.4346764346764346e-05, "loss": 0.826, "step": 8775, "task_loss": 0.4710925817489624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4168340563774109, "epoch": 7.42, "learning_rate": 1.4342068188222036e-05, "loss": 0.5403, "step": 8776, "task_loss": 0.8976253271102905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3759346008300781, "epoch": 7.42, "learning_rate": 1.4337372029679722e-05, "loss": 0.6317, "step": 8777, "task_loss": 0.8430954217910767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9215548634529114, "epoch": 7.42, "learning_rate": 1.433267587113741e-05, "loss": 0.8804, "step": 8778, "task_loss": 0.9477734565734863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5528453588485718, "epoch": 7.42, "learning_rate": 1.4327979712595097e-05, "loss": 0.5081, "step": 8779, "task_loss": 0.2547319531440735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7810794115066528, "epoch": 7.42, "learning_rate": 1.4323283554052786e-05, "loss": 0.6952, "step": 8780, "task_loss": 0.3269648551940918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7016751170158386, "epoch": 7.42, "learning_rate": 1.4318587395510474e-05, "loss": 0.6036, "step": 8781, "task_loss": 0.3167632818222046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7900704741477966, "epoch": 7.42, "learning_rate": 1.431389123696816e-05, "loss": 0.6307, "step": 8782, "task_loss": 0.7553130388259888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2663400173187256, "epoch": 7.42, "learning_rate": 1.4309195078425849e-05, "loss": 0.6085, "step": 8783, "task_loss": 0.23223000764846802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8492657542228699, "epoch": 7.42, "learning_rate": 1.4304498919883535e-05, "loss": 0.6162, "step": 8784, "task_loss": 0.6101099252700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7760933041572571, "epoch": 7.43, "learning_rate": 1.4299802761341225e-05, "loss": 0.7803, "step": 8785, "task_loss": 0.7223026156425476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4555625915527344, "epoch": 7.43, "learning_rate": 1.4295106602798911e-05, "loss": 0.5628, "step": 8786, "task_loss": 0.5610942244529724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8137398958206177, "epoch": 7.43, "learning_rate": 1.4290410444256599e-05, "loss": 0.6833, "step": 8787, "task_loss": 0.868008553981781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.727192223072052, "epoch": 7.43, "learning_rate": 1.4285714285714285e-05, "loss": 0.8524, "step": 8788, "task_loss": 0.36701327562332153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5639445185661316, "epoch": 7.43, "learning_rate": 1.4281018127171975e-05, "loss": 0.5052, "step": 8789, "task_loss": 0.680091917514801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38779574632644653, "epoch": 7.43, "learning_rate": 1.427632196862966e-05, "loss": 0.5415, "step": 8790, "task_loss": 0.3456108570098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3138313293457031, "epoch": 7.43, "learning_rate": 1.427162581008735e-05, "loss": 0.452, "step": 8791, "task_loss": 0.2778777778148651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6917424201965332, "epoch": 7.43, "learning_rate": 1.4266929651545036e-05, "loss": 0.6132, "step": 8792, "task_loss": 0.8920177817344666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49301934242248535, "epoch": 7.43, "learning_rate": 1.4262233493002724e-05, "loss": 0.554, "step": 8793, "task_loss": 0.32925766706466675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4216076135635376, "epoch": 7.43, "learning_rate": 1.425753733446041e-05, "loss": 0.6, "step": 8794, "task_loss": 0.7472627758979797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5538419485092163, "epoch": 7.43, "learning_rate": 1.42528411759181e-05, "loss": 0.6034, "step": 8795, "task_loss": 0.8886017799377441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5164063572883606, "epoch": 7.44, "learning_rate": 1.4248145017375788e-05, "loss": 0.7603, "step": 8796, "task_loss": 0.19771885871887207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9094198942184448, "epoch": 7.44, "learning_rate": 1.4243448858833474e-05, "loss": 0.6542, "step": 8797, "task_loss": 0.3641730546951294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47626736760139465, "epoch": 7.44, "learning_rate": 1.4238752700291164e-05, "loss": 0.7843, "step": 8798, "task_loss": 0.13868051767349243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5356508493423462, "epoch": 7.44, "learning_rate": 1.423405654174885e-05, "loss": 0.6581, "step": 8799, "task_loss": 0.8187415599822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44745469093322754, "epoch": 7.44, "learning_rate": 1.4229360383206538e-05, "loss": 0.4254, "step": 8800, "task_loss": 0.40487509965896606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0848885774612427, "epoch": 7.44, "learning_rate": 1.4224664224664225e-05, "loss": 1.003, "step": 8801, "task_loss": 1.5169591903686523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6064279079437256, "epoch": 7.44, "learning_rate": 1.4219968066121915e-05, "loss": 0.8561, "step": 8802, "task_loss": 1.417007565498352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48177289962768555, "epoch": 7.44, "learning_rate": 1.42152719075796e-05, "loss": 0.47, "step": 8803, "task_loss": 0.5477072596549988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9486595988273621, "epoch": 7.44, "learning_rate": 1.4210575749037289e-05, "loss": 0.6318, "step": 8804, "task_loss": 0.5397034287452698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3896545469760895, "epoch": 7.44, "learning_rate": 1.4205879590494975e-05, "loss": 0.4676, "step": 8805, "task_loss": 0.38796597719192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40605467557907104, "epoch": 7.44, "learning_rate": 1.4201183431952663e-05, "loss": 0.6333, "step": 8806, "task_loss": 0.7369310259819031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9974750280380249, "epoch": 7.44, "learning_rate": 1.419648727341035e-05, "loss": 0.6219, "step": 8807, "task_loss": 0.6610552072525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5816104412078857, "epoch": 7.45, "learning_rate": 1.419179111486804e-05, "loss": 0.8006, "step": 8808, "task_loss": 1.196359395980835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7082867622375488, "epoch": 7.45, "learning_rate": 1.4187094956325726e-05, "loss": 0.5379, "step": 8809, "task_loss": 0.7322938442230225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41693195700645447, "epoch": 7.45, "learning_rate": 1.4182398797783414e-05, "loss": 0.7126, "step": 8810, "task_loss": 0.18205788731575012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.518075704574585, "epoch": 7.45, "learning_rate": 1.4177702639241103e-05, "loss": 0.5328, "step": 8811, "task_loss": 0.4997875988483429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7040442228317261, "epoch": 7.45, "learning_rate": 1.4173006480698788e-05, "loss": 0.6741, "step": 8812, "task_loss": 1.1575943231582642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.935731053352356, "epoch": 7.45, "learning_rate": 1.4168310322156478e-05, "loss": 0.586, "step": 8813, "task_loss": 1.2900288105010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9262793660163879, "epoch": 7.45, "learning_rate": 1.4163614163614164e-05, "loss": 0.7475, "step": 8814, "task_loss": 1.557800531387329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4070414900779724, "epoch": 7.45, "learning_rate": 1.4158918005071852e-05, "loss": 0.6104, "step": 8815, "task_loss": 0.6680669188499451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4281339943408966, "epoch": 7.45, "learning_rate": 1.4154221846529539e-05, "loss": 0.5274, "step": 8816, "task_loss": 0.9679272174835205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5258476734161377, "epoch": 7.45, "learning_rate": 1.4149525687987228e-05, "loss": 0.5405, "step": 8817, "task_loss": 1.1053797006607056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7854675650596619, "epoch": 7.45, "learning_rate": 1.4144829529444915e-05, "loss": 0.7146, "step": 8818, "task_loss": 1.2905930280685425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.63397216796875, "epoch": 7.45, "learning_rate": 1.4140133370902603e-05, "loss": 0.7881, "step": 8819, "task_loss": 0.6018019318580627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8031166791915894, "epoch": 7.46, "learning_rate": 1.4135437212360289e-05, "loss": 0.6013, "step": 8820, "task_loss": 0.5283890962600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8007916212081909, "epoch": 7.46, "learning_rate": 1.4130741053817979e-05, "loss": 0.6384, "step": 8821, "task_loss": 0.831751823425293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0044329166412354, "epoch": 7.46, "learning_rate": 1.4126044895275663e-05, "loss": 0.7521, "step": 8822, "task_loss": 0.4560522139072418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49193131923675537, "epoch": 7.46, "learning_rate": 1.4121348736733353e-05, "loss": 0.5287, "step": 8823, "task_loss": 0.3974684178829193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6580852270126343, "epoch": 7.46, "learning_rate": 1.411665257819104e-05, "loss": 0.7038, "step": 8824, "task_loss": 1.2195870876312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.363609254360199, "epoch": 7.46, "learning_rate": 1.4111956419648727e-05, "loss": 0.561, "step": 8825, "task_loss": 0.9209455847740173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49771973490715027, "epoch": 7.46, "learning_rate": 1.4107260261106417e-05, "loss": 0.5071, "step": 8826, "task_loss": 0.5364199876785278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9025495648384094, "epoch": 7.46, "learning_rate": 1.4102564102564104e-05, "loss": 0.6297, "step": 8827, "task_loss": 1.0529654026031494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4821113646030426, "epoch": 7.46, "learning_rate": 1.4097867944021792e-05, "loss": 0.6007, "step": 8828, "task_loss": 0.16412353515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3451638221740723, "epoch": 7.46, "learning_rate": 1.4093171785479478e-05, "loss": 0.904, "step": 8829, "task_loss": 0.6956733465194702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9514122605323792, "epoch": 7.46, "learning_rate": 1.4088475626937168e-05, "loss": 0.6437, "step": 8830, "task_loss": 1.6201086044311523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5856503844261169, "epoch": 7.46, "learning_rate": 1.4083779468394854e-05, "loss": 0.6313, "step": 8831, "task_loss": 0.4601775109767914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.21267268061637878, "epoch": 7.47, "learning_rate": 1.4079083309852542e-05, "loss": 0.7022, "step": 8832, "task_loss": 0.14750169217586517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0473490953445435, "epoch": 7.47, "learning_rate": 1.4074387151310228e-05, "loss": 0.624, "step": 8833, "task_loss": 1.0580224990844727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9063878059387207, "epoch": 7.47, "learning_rate": 1.4069690992767916e-05, "loss": 0.7555, "step": 8834, "task_loss": 1.02269446849823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33772343397140503, "epoch": 7.47, "learning_rate": 1.4064994834225603e-05, "loss": 0.4743, "step": 8835, "task_loss": 0.46164241433143616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4827035367488861, "epoch": 7.47, "learning_rate": 1.4060298675683292e-05, "loss": 0.6154, "step": 8836, "task_loss": 0.7680602669715881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9013213515281677, "epoch": 7.47, "learning_rate": 1.4055602517140979e-05, "loss": 0.7917, "step": 8837, "task_loss": 0.5436569452285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8341455459594727, "epoch": 7.47, "learning_rate": 1.4050906358598667e-05, "loss": 0.6919, "step": 8838, "task_loss": 0.6588075757026672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.762584924697876, "epoch": 7.47, "learning_rate": 1.4046210200056353e-05, "loss": 0.5851, "step": 8839, "task_loss": 0.8509184122085571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5847916603088379, "epoch": 7.47, "learning_rate": 1.4041514041514043e-05, "loss": 0.4197, "step": 8840, "task_loss": 0.3632822036743164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6044394969940186, "epoch": 7.47, "learning_rate": 1.4036817882971731e-05, "loss": 0.6412, "step": 8841, "task_loss": 1.1279752254486084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3632338345050812, "epoch": 7.47, "learning_rate": 1.4032121724429417e-05, "loss": 0.5429, "step": 8842, "task_loss": 1.2087377309799194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9052183032035828, "epoch": 7.47, "learning_rate": 1.4027425565887107e-05, "loss": 0.9335, "step": 8843, "task_loss": 1.5793147087097168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7720504999160767, "epoch": 7.48, "learning_rate": 1.4022729407344792e-05, "loss": 0.7824, "step": 8844, "task_loss": 1.397289514541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6841272115707397, "epoch": 7.48, "learning_rate": 1.4018033248802481e-05, "loss": 0.7192, "step": 8845, "task_loss": 0.8133702278137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36515647172927856, "epoch": 7.48, "learning_rate": 1.4013337090260168e-05, "loss": 0.7363, "step": 8846, "task_loss": 0.7815085649490356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7764300107955933, "epoch": 7.48, "learning_rate": 1.4008640931717856e-05, "loss": 0.7638, "step": 8847, "task_loss": 1.2551336288452148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6647482514381409, "epoch": 7.48, "learning_rate": 1.4003944773175542e-05, "loss": 0.9178, "step": 8848, "task_loss": 1.0449137687683105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5100473761558533, "epoch": 7.48, "learning_rate": 1.3999248614633232e-05, "loss": 0.7298, "step": 8849, "task_loss": 0.48996424674987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6519216299057007, "epoch": 7.48, "learning_rate": 1.3994552456090918e-05, "loss": 0.5964, "step": 8850, "task_loss": 0.514051616191864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6779094934463501, "epoch": 7.48, "learning_rate": 1.3989856297548606e-05, "loss": 0.7047, "step": 8851, "task_loss": 0.98404860496521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4576091170310974, "epoch": 7.48, "learning_rate": 1.3985160139006292e-05, "loss": 0.5453, "step": 8852, "task_loss": 0.7895155549049377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8270617723464966, "epoch": 7.48, "learning_rate": 1.3980463980463982e-05, "loss": 0.6426, "step": 8853, "task_loss": 0.7324225902557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28825750946998596, "epoch": 7.48, "learning_rate": 1.3975767821921667e-05, "loss": 0.5616, "step": 8854, "task_loss": 0.4130653440952301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5886930227279663, "epoch": 7.48, "learning_rate": 1.3971071663379357e-05, "loss": 0.6386, "step": 8855, "task_loss": 0.7830871939659119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1322964429855347, "epoch": 7.49, "learning_rate": 1.3966375504837043e-05, "loss": 0.6003, "step": 8856, "task_loss": 1.1609469652175903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41545674204826355, "epoch": 7.49, "learning_rate": 1.3961679346294731e-05, "loss": 0.4415, "step": 8857, "task_loss": 0.5572225451469421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34669339656829834, "epoch": 7.49, "learning_rate": 1.395698318775242e-05, "loss": 0.4847, "step": 8858, "task_loss": 0.36142638325691223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5019984245300293, "epoch": 7.49, "learning_rate": 1.3952287029210107e-05, "loss": 0.6625, "step": 8859, "task_loss": 0.243475079536438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39736732840538025, "epoch": 7.49, "learning_rate": 1.3947590870667795e-05, "loss": 0.6771, "step": 8860, "task_loss": 0.7308594584465027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.51688551902771, "epoch": 7.49, "learning_rate": 1.3942894712125481e-05, "loss": 0.4559, "step": 8861, "task_loss": 0.6861639022827148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6876169443130493, "epoch": 7.49, "learning_rate": 1.3938198553583171e-05, "loss": 0.6824, "step": 8862, "task_loss": 0.7852447628974915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45001864433288574, "epoch": 7.49, "learning_rate": 1.3933502395040857e-05, "loss": 0.6047, "step": 8863, "task_loss": 0.8527868986129761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6730351448059082, "epoch": 7.49, "learning_rate": 1.3928806236498546e-05, "loss": 0.5682, "step": 8864, "task_loss": 0.2160242199897766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36679577827453613, "epoch": 7.49, "learning_rate": 1.3924110077956232e-05, "loss": 0.5995, "step": 8865, "task_loss": 0.11887159198522568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5325392484664917, "epoch": 7.49, "learning_rate": 1.391941391941392e-05, "loss": 0.6127, "step": 8866, "task_loss": 0.6124284863471985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6941558122634888, "epoch": 7.5, "learning_rate": 1.3914717760871606e-05, "loss": 0.6046, "step": 8867, "task_loss": 0.37787818908691406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7647233009338379, "epoch": 7.5, "learning_rate": 1.3910021602329296e-05, "loss": 0.606, "step": 8868, "task_loss": 1.3090510368347168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4305768311023712, "epoch": 7.5, "learning_rate": 1.3905325443786982e-05, "loss": 0.6691, "step": 8869, "task_loss": 0.17104634642601013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.607215404510498, "epoch": 7.5, "learning_rate": 1.390062928524467e-05, "loss": 0.6101, "step": 8870, "task_loss": 0.8759523034095764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3336174488067627, "epoch": 7.5, "learning_rate": 1.3895933126702357e-05, "loss": 0.4743, "step": 8871, "task_loss": 0.15742571651935577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4073694944381714, "epoch": 7.5, "learning_rate": 1.3891236968160046e-05, "loss": 0.4578, "step": 8872, "task_loss": 0.49473658204078674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44035202264785767, "epoch": 7.5, "learning_rate": 1.3886540809617734e-05, "loss": 0.6142, "step": 8873, "task_loss": 0.8415103554725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7873488068580627, "epoch": 7.5, "learning_rate": 1.388184465107542e-05, "loss": 0.7357, "step": 8874, "task_loss": 1.210612177848816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41283154487609863, "epoch": 7.5, "learning_rate": 1.387714849253311e-05, "loss": 0.5424, "step": 8875, "task_loss": 0.20983490347862244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4437851905822754, "epoch": 7.5, "learning_rate": 1.3872452333990795e-05, "loss": 0.6135, "step": 8876, "task_loss": 0.31561535596847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7585561871528625, "epoch": 7.5, "learning_rate": 1.3867756175448485e-05, "loss": 0.7301, "step": 8877, "task_loss": 0.78935706615448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8111639022827148, "epoch": 7.5, "learning_rate": 1.3863060016906171e-05, "loss": 0.6319, "step": 8878, "task_loss": 0.288131445646286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6545495986938477, "epoch": 7.51, "learning_rate": 1.385836385836386e-05, "loss": 0.6141, "step": 8879, "task_loss": 0.19642230868339539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3311038613319397, "epoch": 7.51, "learning_rate": 1.3853667699821546e-05, "loss": 0.5377, "step": 8880, "task_loss": 0.7662826776504517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6641418933868408, "epoch": 7.51, "learning_rate": 1.3848971541279235e-05, "loss": 0.4628, "step": 8881, "task_loss": 0.8792110681533813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6287149786949158, "epoch": 7.51, "learning_rate": 1.3844275382736922e-05, "loss": 1.0357, "step": 8882, "task_loss": 0.4099615216255188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8372287750244141, "epoch": 7.51, "learning_rate": 1.383957922419461e-05, "loss": 0.5975, "step": 8883, "task_loss": 1.183732509613037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9073964357376099, "epoch": 7.51, "learning_rate": 1.3834883065652296e-05, "loss": 0.6814, "step": 8884, "task_loss": 1.5141808986663818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5084950923919678, "epoch": 7.51, "learning_rate": 1.3830186907109984e-05, "loss": 0.4327, "step": 8885, "task_loss": 0.7032251358032227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33746883273124695, "epoch": 7.51, "learning_rate": 1.382549074856767e-05, "loss": 0.7159, "step": 8886, "task_loss": 1.1070866584777832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7314506769180298, "epoch": 7.51, "learning_rate": 1.382079459002536e-05, "loss": 0.7526, "step": 8887, "task_loss": 0.32172614336013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6887412071228027, "epoch": 7.51, "learning_rate": 1.3816098431483048e-05, "loss": 0.5565, "step": 8888, "task_loss": 0.9023212790489197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26296737790107727, "epoch": 7.51, "learning_rate": 1.3811402272940734e-05, "loss": 0.5218, "step": 8889, "task_loss": 0.22079001367092133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7881039381027222, "epoch": 7.51, "learning_rate": 1.3806706114398424e-05, "loss": 0.5847, "step": 8890, "task_loss": 0.5720228552818298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3764512240886688, "epoch": 7.52, "learning_rate": 1.380200995585611e-05, "loss": 0.523, "step": 8891, "task_loss": 0.30603504180908203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5162084102630615, "epoch": 7.52, "learning_rate": 1.3797313797313799e-05, "loss": 0.6025, "step": 8892, "task_loss": 0.29257574677467346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6364763975143433, "epoch": 7.52, "learning_rate": 1.3792617638771485e-05, "loss": 0.6658, "step": 8893, "task_loss": 0.8498486876487732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6339189410209656, "epoch": 7.52, "learning_rate": 1.3787921480229175e-05, "loss": 0.5087, "step": 8894, "task_loss": 0.8223717212677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6795960068702698, "epoch": 7.52, "learning_rate": 1.378322532168686e-05, "loss": 0.9069, "step": 8895, "task_loss": 0.7811558842658997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9231357574462891, "epoch": 7.52, "learning_rate": 1.3778529163144549e-05, "loss": 0.674, "step": 8896, "task_loss": 0.8680524826049805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7075031995773315, "epoch": 7.52, "learning_rate": 1.3773833004602235e-05, "loss": 0.5444, "step": 8897, "task_loss": 0.6901678442955017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5338830351829529, "epoch": 7.52, "learning_rate": 1.3769136846059923e-05, "loss": 0.4502, "step": 8898, "task_loss": 1.1126830577850342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.542309045791626, "epoch": 7.52, "learning_rate": 1.376444068751761e-05, "loss": 0.6814, "step": 8899, "task_loss": 1.0683743953704834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43895402550697327, "epoch": 7.52, "learning_rate": 1.37597445289753e-05, "loss": 0.4483, "step": 8900, "task_loss": 0.9535865783691406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7284560203552246, "epoch": 7.52, "learning_rate": 1.3755048370432986e-05, "loss": 0.5832, "step": 8901, "task_loss": 0.5317504405975342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4973980486392975, "epoch": 7.52, "learning_rate": 1.3750352211890674e-05, "loss": 0.4984, "step": 8902, "task_loss": 0.7434775829315186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5061662197113037, "epoch": 7.53, "learning_rate": 1.3745656053348364e-05, "loss": 0.5925, "step": 8903, "task_loss": 0.7438743114471436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25885939598083496, "epoch": 7.53, "learning_rate": 1.374095989480605e-05, "loss": 0.5416, "step": 8904, "task_loss": 0.4865226149559021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4541357457637787, "epoch": 7.53, "learning_rate": 1.3736263736263738e-05, "loss": 0.669, "step": 8905, "task_loss": 0.3624517023563385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4954375624656677, "epoch": 7.53, "learning_rate": 1.3731567577721424e-05, "loss": 0.6767, "step": 8906, "task_loss": 0.35997769236564636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5830038189888, "epoch": 7.53, "learning_rate": 1.3726871419179112e-05, "loss": 0.6085, "step": 8907, "task_loss": 0.9088413119316101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31482070684432983, "epoch": 7.53, "learning_rate": 1.3722175260636799e-05, "loss": 0.6398, "step": 8908, "task_loss": 0.22740982472896576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26539477705955505, "epoch": 7.53, "learning_rate": 1.3717479102094488e-05, "loss": 0.5436, "step": 8909, "task_loss": 0.3458150327205658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3648606538772583, "epoch": 7.53, "learning_rate": 1.3712782943552175e-05, "loss": 0.5971, "step": 8910, "task_loss": 0.35045376420021057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5343583226203918, "epoch": 7.53, "learning_rate": 1.3708086785009863e-05, "loss": 0.4939, "step": 8911, "task_loss": 0.8208145499229431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9925504326820374, "epoch": 7.53, "learning_rate": 1.3703390626467549e-05, "loss": 0.6825, "step": 8912, "task_loss": 0.9285597801208496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40402334928512573, "epoch": 7.53, "learning_rate": 1.3698694467925239e-05, "loss": 0.5284, "step": 8913, "task_loss": 1.4407533407211304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5207595825195312, "epoch": 7.53, "learning_rate": 1.3693998309382925e-05, "loss": 0.7412, "step": 8914, "task_loss": 0.3618236184120178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4540119171142578, "epoch": 7.54, "learning_rate": 1.3689302150840613e-05, "loss": 0.6699, "step": 8915, "task_loss": 1.0704293251037598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5440312623977661, "epoch": 7.54, "learning_rate": 1.36846059922983e-05, "loss": 0.4314, "step": 8916, "task_loss": 0.6740189790725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7333455085754395, "epoch": 7.54, "learning_rate": 1.3679909833755988e-05, "loss": 0.6413, "step": 8917, "task_loss": 0.9622343182563782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6980935335159302, "epoch": 7.54, "learning_rate": 1.3675213675213677e-05, "loss": 0.7288, "step": 8918, "task_loss": 0.783235490322113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5580082535743713, "epoch": 7.54, "learning_rate": 1.3670517516671364e-05, "loss": 0.7308, "step": 8919, "task_loss": 0.7305170297622681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8420637249946594, "epoch": 7.54, "learning_rate": 1.3665821358129052e-05, "loss": 0.7902, "step": 8920, "task_loss": 1.3712555170059204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6346033811569214, "epoch": 7.54, "learning_rate": 1.3661125199586738e-05, "loss": 0.9368, "step": 8921, "task_loss": 1.1724592447280884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5805145502090454, "epoch": 7.54, "learning_rate": 1.3656429041044428e-05, "loss": 0.4025, "step": 8922, "task_loss": 0.9596258401870728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7997813820838928, "epoch": 7.54, "learning_rate": 1.3651732882502114e-05, "loss": 0.6564, "step": 8923, "task_loss": 0.6341410279273987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4153688848018646, "epoch": 7.54, "learning_rate": 1.3647036723959802e-05, "loss": 0.5531, "step": 8924, "task_loss": 0.4851565659046173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8263653516769409, "epoch": 7.54, "learning_rate": 1.3642340565417488e-05, "loss": 0.6389, "step": 8925, "task_loss": 1.1839333772659302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6100901365280151, "epoch": 7.54, "learning_rate": 1.3637644406875178e-05, "loss": 0.5973, "step": 8926, "task_loss": 0.760794997215271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7172326445579529, "epoch": 7.55, "learning_rate": 1.3632948248332863e-05, "loss": 0.7104, "step": 8927, "task_loss": 0.5268847942352295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6305948495864868, "epoch": 7.55, "learning_rate": 1.3628252089790553e-05, "loss": 0.6811, "step": 8928, "task_loss": 0.8348854780197144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6487269401550293, "epoch": 7.55, "learning_rate": 1.3623555931248239e-05, "loss": 0.6046, "step": 8929, "task_loss": 0.664027214050293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7526897192001343, "epoch": 7.55, "learning_rate": 1.3618859772705927e-05, "loss": 0.7821, "step": 8930, "task_loss": 1.2933772802352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42170771956443787, "epoch": 7.55, "learning_rate": 1.3614163614163613e-05, "loss": 0.5188, "step": 8931, "task_loss": 0.4956776797771454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7291697263717651, "epoch": 7.55, "learning_rate": 1.3609467455621303e-05, "loss": 0.707, "step": 8932, "task_loss": 0.6128455996513367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5117077827453613, "epoch": 7.55, "learning_rate": 1.360477129707899e-05, "loss": 0.5902, "step": 8933, "task_loss": 0.7781287431716919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6265698671340942, "epoch": 7.55, "learning_rate": 1.3600075138536677e-05, "loss": 0.5352, "step": 8934, "task_loss": 0.6233731508255005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7455462217330933, "epoch": 7.55, "learning_rate": 1.3595378979994367e-05, "loss": 0.5942, "step": 8935, "task_loss": 0.9626688361167908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5240319967269897, "epoch": 7.55, "learning_rate": 1.3590682821452053e-05, "loss": 0.6312, "step": 8936, "task_loss": 0.30184900760650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6667303442955017, "epoch": 7.55, "learning_rate": 1.3585986662909741e-05, "loss": 0.5168, "step": 8937, "task_loss": 0.342134028673172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8145533800125122, "epoch": 7.56, "learning_rate": 1.3581290504367428e-05, "loss": 0.6233, "step": 8938, "task_loss": 0.7255985736846924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5288999080657959, "epoch": 7.56, "learning_rate": 1.3576594345825116e-05, "loss": 0.7124, "step": 8939, "task_loss": 0.4955522119998932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49799031019210815, "epoch": 7.56, "learning_rate": 1.3571898187282802e-05, "loss": 0.6238, "step": 8940, "task_loss": 0.6523423194885254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4156922399997711, "epoch": 7.56, "learning_rate": 1.3567202028740492e-05, "loss": 0.4967, "step": 8941, "task_loss": 0.5171871185302734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4986758530139923, "epoch": 7.56, "learning_rate": 1.3562505870198178e-05, "loss": 0.5186, "step": 8942, "task_loss": 0.8211464285850525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.584071159362793, "epoch": 7.56, "learning_rate": 1.3557809711655866e-05, "loss": 0.5939, "step": 8943, "task_loss": 0.7096570730209351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7129727602005005, "epoch": 7.56, "learning_rate": 1.3553113553113553e-05, "loss": 0.505, "step": 8944, "task_loss": 1.032939076423645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5510331988334656, "epoch": 7.56, "learning_rate": 1.3548417394571242e-05, "loss": 0.6564, "step": 8945, "task_loss": 0.568354606628418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0309984683990479, "epoch": 7.56, "learning_rate": 1.3543721236028927e-05, "loss": 0.6858, "step": 8946, "task_loss": 1.9083861112594604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5534846782684326, "epoch": 7.56, "learning_rate": 1.3539025077486617e-05, "loss": 0.7029, "step": 8947, "task_loss": 0.3806469440460205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5350360870361328, "epoch": 7.56, "learning_rate": 1.3534328918944303e-05, "loss": 0.6174, "step": 8948, "task_loss": 0.5583010911941528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47263404726982117, "epoch": 7.56, "learning_rate": 1.3529632760401991e-05, "loss": 0.5574, "step": 8949, "task_loss": 0.19095216691493988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5050381422042847, "epoch": 7.57, "learning_rate": 1.352493660185968e-05, "loss": 0.6579, "step": 8950, "task_loss": 1.0900251865386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7350224256515503, "epoch": 7.57, "learning_rate": 1.3520240443317367e-05, "loss": 0.7378, "step": 8951, "task_loss": 1.9819756746292114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35176029801368713, "epoch": 7.57, "learning_rate": 1.3515544284775055e-05, "loss": 0.5924, "step": 8952, "task_loss": 0.9507119059562683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9072763919830322, "epoch": 7.57, "learning_rate": 1.3510848126232742e-05, "loss": 0.5656, "step": 8953, "task_loss": 0.7344108819961548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5861798524856567, "epoch": 7.57, "learning_rate": 1.3506151967690431e-05, "loss": 0.5776, "step": 8954, "task_loss": 1.2045481204986572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4218946397304535, "epoch": 7.57, "learning_rate": 1.3501455809148118e-05, "loss": 0.5492, "step": 8955, "task_loss": 1.098868489265442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6271092891693115, "epoch": 7.57, "learning_rate": 1.3496759650605806e-05, "loss": 0.5885, "step": 8956, "task_loss": 0.7869184613227844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9844666719436646, "epoch": 7.57, "learning_rate": 1.3492063492063492e-05, "loss": 0.6404, "step": 8957, "task_loss": 0.669703483581543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20546510815620422, "epoch": 7.57, "learning_rate": 1.3487367333521182e-05, "loss": 0.5522, "step": 8958, "task_loss": 0.43966707587242126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46099865436553955, "epoch": 7.57, "learning_rate": 1.3482671174978866e-05, "loss": 0.5629, "step": 8959, "task_loss": 0.4389518201351166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7387072443962097, "epoch": 7.57, "learning_rate": 1.3477975016436556e-05, "loss": 0.6891, "step": 8960, "task_loss": 1.1003303527832031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7470431327819824, "epoch": 7.57, "learning_rate": 1.3473278857894242e-05, "loss": 0.7838, "step": 8961, "task_loss": 0.5220003724098206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25619348883628845, "epoch": 7.58, "learning_rate": 1.346858269935193e-05, "loss": 0.5534, "step": 8962, "task_loss": 0.4513145387172699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9220412373542786, "epoch": 7.58, "learning_rate": 1.3463886540809617e-05, "loss": 0.6621, "step": 8963, "task_loss": 1.2315433025360107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6066278219223022, "epoch": 7.58, "learning_rate": 1.3459190382267307e-05, "loss": 0.5635, "step": 8964, "task_loss": 0.4779108166694641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7027988433837891, "epoch": 7.58, "learning_rate": 1.3454494223724995e-05, "loss": 0.5846, "step": 8965, "task_loss": 0.4471530616283417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5344998240470886, "epoch": 7.58, "learning_rate": 1.3449798065182681e-05, "loss": 0.4655, "step": 8966, "task_loss": 0.6914742588996887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6566864252090454, "epoch": 7.58, "learning_rate": 1.344510190664037e-05, "loss": 0.5817, "step": 8967, "task_loss": 0.768980085849762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.417716383934021, "epoch": 7.58, "learning_rate": 1.3440405748098055e-05, "loss": 0.8569, "step": 8968, "task_loss": 1.143964171409607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5764356255531311, "epoch": 7.58, "learning_rate": 1.3435709589555745e-05, "loss": 0.7695, "step": 8969, "task_loss": 0.27632227540016174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4697914719581604, "epoch": 7.58, "learning_rate": 1.3431013431013431e-05, "loss": 0.6726, "step": 8970, "task_loss": 0.6204628944396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2383044958114624, "epoch": 7.58, "learning_rate": 1.342631727247112e-05, "loss": 0.5566, "step": 8971, "task_loss": 0.012404450215399265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4473098814487457, "epoch": 7.58, "learning_rate": 1.3421621113928806e-05, "loss": 0.5913, "step": 8972, "task_loss": 0.6144418120384216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5039156079292297, "epoch": 7.58, "learning_rate": 1.3416924955386495e-05, "loss": 0.5507, "step": 8973, "task_loss": 0.5403667092323303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43870508670806885, "epoch": 7.59, "learning_rate": 1.3412228796844182e-05, "loss": 0.5339, "step": 8974, "task_loss": 0.17271222174167633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3637676239013672, "epoch": 7.59, "learning_rate": 1.340753263830187e-05, "loss": 0.8828, "step": 8975, "task_loss": 0.5955216884613037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7214004397392273, "epoch": 7.59, "learning_rate": 1.3402836479759556e-05, "loss": 0.7546, "step": 8976, "task_loss": 0.924312949180603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2916887402534485, "epoch": 7.59, "learning_rate": 1.3398140321217246e-05, "loss": 0.648, "step": 8977, "task_loss": 0.41269224882125854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7409454584121704, "epoch": 7.59, "learning_rate": 1.339344416267493e-05, "loss": 0.7126, "step": 8978, "task_loss": 1.1775206327438354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47395795583724976, "epoch": 7.59, "learning_rate": 1.338874800413262e-05, "loss": 0.7015, "step": 8979, "task_loss": 0.6879599094390869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6251181364059448, "epoch": 7.59, "learning_rate": 1.3384051845590308e-05, "loss": 0.539, "step": 8980, "task_loss": 0.1875106394290924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3682768642902374, "epoch": 7.59, "learning_rate": 1.3379355687047995e-05, "loss": 0.4235, "step": 8981, "task_loss": 0.4374229311943054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5918357372283936, "epoch": 7.59, "learning_rate": 1.3374659528505684e-05, "loss": 0.7663, "step": 8982, "task_loss": 0.6424500942230225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7029181718826294, "epoch": 7.59, "learning_rate": 1.336996336996337e-05, "loss": 0.4818, "step": 8983, "task_loss": 0.41917601227760315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6133747696876526, "epoch": 7.59, "learning_rate": 1.3365267211421059e-05, "loss": 0.6514, "step": 8984, "task_loss": 1.036465048789978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4258459806442261, "epoch": 7.59, "learning_rate": 1.3360571052878745e-05, "loss": 0.4757, "step": 8985, "task_loss": 0.23279881477355957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6525278687477112, "epoch": 7.6, "learning_rate": 1.3355874894336435e-05, "loss": 0.6549, "step": 8986, "task_loss": 0.3322206139564514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4371565878391266, "epoch": 7.6, "learning_rate": 1.3351178735794121e-05, "loss": 0.5731, "step": 8987, "task_loss": 0.267018586397171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37684255838394165, "epoch": 7.6, "learning_rate": 1.334648257725181e-05, "loss": 0.6312, "step": 8988, "task_loss": 0.6275477409362793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6850274801254272, "epoch": 7.6, "learning_rate": 1.3341786418709496e-05, "loss": 0.6525, "step": 8989, "task_loss": 0.6877949833869934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6953070163726807, "epoch": 7.6, "learning_rate": 1.3337090260167184e-05, "loss": 0.6114, "step": 8990, "task_loss": 0.6951400637626648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6871379613876343, "epoch": 7.6, "learning_rate": 1.333239410162487e-05, "loss": 0.6864, "step": 8991, "task_loss": 1.1365954875946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.649625837802887, "epoch": 7.6, "learning_rate": 1.332769794308256e-05, "loss": 0.4748, "step": 8992, "task_loss": 0.1356354057788849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5677217245101929, "epoch": 7.6, "learning_rate": 1.3323001784540246e-05, "loss": 0.7222, "step": 8993, "task_loss": 0.6196795701980591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8794514536857605, "epoch": 7.6, "learning_rate": 1.3318305625997934e-05, "loss": 0.6241, "step": 8994, "task_loss": 0.9544141888618469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2140377759933472, "epoch": 7.6, "learning_rate": 1.3313609467455624e-05, "loss": 0.6514, "step": 8995, "task_loss": 1.1253108978271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22128227353096008, "epoch": 7.6, "learning_rate": 1.330891330891331e-05, "loss": 0.4357, "step": 8996, "task_loss": 0.45132794976234436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5807039141654968, "epoch": 7.6, "learning_rate": 1.3304217150370998e-05, "loss": 0.6397, "step": 8997, "task_loss": 0.49469056725502014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5622422695159912, "epoch": 7.61, "learning_rate": 1.3299520991828684e-05, "loss": 0.526, "step": 8998, "task_loss": 0.6068680882453918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3791293799877167, "epoch": 7.61, "learning_rate": 1.3294824833286374e-05, "loss": 0.5031, "step": 8999, "task_loss": 0.44225290417671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2813960313796997, "epoch": 7.61, "learning_rate": 1.3290128674744059e-05, "loss": 0.496, "step": 9000, "task_loss": 0.29683274030685425 }, { "epoch": 7.61, "eval_accuracy": 0.8981386138613862, "eval_loss": 0.41152384877204895, "eval_runtime": 225.3134, "eval_samples_per_second": 112.066, "eval_steps_per_second": 0.879, "step": 9000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7307490110397339, "epoch": 7.61, "learning_rate": 1.3285432516201749e-05, "loss": 0.6148, "step": 9001, "task_loss": 0.966532289981842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7366096377372742, "epoch": 7.61, "learning_rate": 1.3280736357659435e-05, "loss": 0.5278, "step": 9002, "task_loss": 0.47506794333457947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.533441960811615, "epoch": 7.61, "learning_rate": 1.3276040199117123e-05, "loss": 0.7801, "step": 9003, "task_loss": 0.9930967688560486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7346415519714355, "epoch": 7.61, "learning_rate": 1.327134404057481e-05, "loss": 0.7007, "step": 9004, "task_loss": 0.7331096529960632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4591485261917114, "epoch": 7.61, "learning_rate": 1.3266647882032499e-05, "loss": 0.578, "step": 9005, "task_loss": 0.13209810853004456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7664841413497925, "epoch": 7.61, "learning_rate": 1.3261951723490185e-05, "loss": 0.8004, "step": 9006, "task_loss": 0.4957949221134186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3065317869186401, "epoch": 7.61, "learning_rate": 1.3257255564947873e-05, "loss": 1.052, "step": 9007, "task_loss": 1.2621599435806274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36970430612564087, "epoch": 7.61, "learning_rate": 1.325255940640556e-05, "loss": 0.4144, "step": 9008, "task_loss": 0.5109555125236511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5257435441017151, "epoch": 7.61, "learning_rate": 1.324786324786325e-05, "loss": 0.6924, "step": 9009, "task_loss": 0.7217450141906738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47787120938301086, "epoch": 7.62, "learning_rate": 1.3243167089320934e-05, "loss": 0.5521, "step": 9010, "task_loss": 0.8045516014099121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44429051876068115, "epoch": 7.62, "learning_rate": 1.3238470930778624e-05, "loss": 0.5788, "step": 9011, "task_loss": 1.1146248579025269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7454016208648682, "epoch": 7.62, "learning_rate": 1.3233774772236312e-05, "loss": 0.7014, "step": 9012, "task_loss": 1.4478458166122437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7985577583312988, "epoch": 7.62, "learning_rate": 1.3229078613693998e-05, "loss": 0.8142, "step": 9013, "task_loss": 1.3632134199142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7632204294204712, "epoch": 7.62, "learning_rate": 1.3224382455151688e-05, "loss": 0.6445, "step": 9014, "task_loss": 0.5585616230964661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7599582672119141, "epoch": 7.62, "learning_rate": 1.3219686296609374e-05, "loss": 0.6155, "step": 9015, "task_loss": 1.4278753995895386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6794182062149048, "epoch": 7.62, "learning_rate": 1.3214990138067062e-05, "loss": 0.5356, "step": 9016, "task_loss": 1.3235254287719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40870916843414307, "epoch": 7.62, "learning_rate": 1.3210293979524749e-05, "loss": 0.5948, "step": 9017, "task_loss": 0.27083802223205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4331333041191101, "epoch": 7.62, "learning_rate": 1.3205597820982438e-05, "loss": 0.6179, "step": 9018, "task_loss": 0.5020532608032227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6030506491661072, "epoch": 7.62, "learning_rate": 1.3200901662440125e-05, "loss": 0.5212, "step": 9019, "task_loss": 0.9635381698608398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6907272338867188, "epoch": 7.62, "learning_rate": 1.3196205503897813e-05, "loss": 0.5526, "step": 9020, "task_loss": 0.7278802990913391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6707903146743774, "epoch": 7.63, "learning_rate": 1.3191509345355499e-05, "loss": 0.5338, "step": 9021, "task_loss": 1.2371171712875366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9012331962585449, "epoch": 7.63, "learning_rate": 1.3186813186813187e-05, "loss": 0.6654, "step": 9022, "task_loss": 0.46501392126083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8501212000846863, "epoch": 7.63, "learning_rate": 1.3182117028270873e-05, "loss": 0.7624, "step": 9023, "task_loss": 1.4143824577331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.659072756767273, "epoch": 7.63, "learning_rate": 1.3177420869728563e-05, "loss": 0.6648, "step": 9024, "task_loss": 1.3253391981124878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7346950769424438, "epoch": 7.63, "learning_rate": 1.317272471118625e-05, "loss": 0.7094, "step": 9025, "task_loss": 0.8217813372612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6128737926483154, "epoch": 7.63, "learning_rate": 1.3168028552643938e-05, "loss": 0.5558, "step": 9026, "task_loss": 0.7849315404891968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5093212127685547, "epoch": 7.63, "learning_rate": 1.3163332394101627e-05, "loss": 0.8112, "step": 9027, "task_loss": 0.7542285323143005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5931057929992676, "epoch": 7.63, "learning_rate": 1.3158636235559314e-05, "loss": 0.5912, "step": 9028, "task_loss": 0.3304416537284851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3930579125881195, "epoch": 7.63, "learning_rate": 1.3153940077017002e-05, "loss": 0.537, "step": 9029, "task_loss": 0.412393182516098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5463581681251526, "epoch": 7.63, "learning_rate": 1.3149243918474688e-05, "loss": 0.6033, "step": 9030, "task_loss": 1.3231762647628784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3858368396759033, "epoch": 7.63, "learning_rate": 1.3144547759932378e-05, "loss": 0.5303, "step": 9031, "task_loss": 0.315184623003006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5829121470451355, "epoch": 7.63, "learning_rate": 1.3139851601390062e-05, "loss": 0.716, "step": 9032, "task_loss": 0.6660099625587463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5190823078155518, "epoch": 7.64, "learning_rate": 1.3135155442847752e-05, "loss": 0.7032, "step": 9033, "task_loss": 0.49052760004997253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37749361991882324, "epoch": 7.64, "learning_rate": 1.3130459284305438e-05, "loss": 0.5764, "step": 9034, "task_loss": 0.10549497604370117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5722342133522034, "epoch": 7.64, "learning_rate": 1.3125763125763126e-05, "loss": 0.6668, "step": 9035, "task_loss": 0.12600325047969818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6511390209197998, "epoch": 7.64, "learning_rate": 1.3121066967220813e-05, "loss": 0.5532, "step": 9036, "task_loss": 0.061101507395505905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6330512166023254, "epoch": 7.64, "learning_rate": 1.3116370808678502e-05, "loss": 0.5864, "step": 9037, "task_loss": 0.4041541516780853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5505348443984985, "epoch": 7.64, "learning_rate": 1.3111674650136189e-05, "loss": 0.534, "step": 9038, "task_loss": 1.0301940441131592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40512585639953613, "epoch": 7.64, "learning_rate": 1.3106978491593877e-05, "loss": 0.7018, "step": 9039, "task_loss": 0.6876006722450256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34805920720100403, "epoch": 7.64, "learning_rate": 1.3102282333051563e-05, "loss": 0.5758, "step": 9040, "task_loss": 0.7382710576057434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6497393846511841, "epoch": 7.64, "learning_rate": 1.3097586174509251e-05, "loss": 0.7256, "step": 9041, "task_loss": 0.7922466397285461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9173288941383362, "epoch": 7.64, "learning_rate": 1.3092890015966941e-05, "loss": 0.844, "step": 9042, "task_loss": 0.8858552575111389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4481070339679718, "epoch": 7.64, "learning_rate": 1.3088193857424627e-05, "loss": 0.5436, "step": 9043, "task_loss": 0.7812582850456238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5777219533920288, "epoch": 7.64, "learning_rate": 1.3083497698882315e-05, "loss": 0.5799, "step": 9044, "task_loss": 1.7439266443252563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9062776565551758, "epoch": 7.65, "learning_rate": 1.3078801540340002e-05, "loss": 0.6698, "step": 9045, "task_loss": 1.6864441633224487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5858691930770874, "epoch": 7.65, "learning_rate": 1.3074105381797691e-05, "loss": 0.6701, "step": 9046, "task_loss": 1.0712456703186035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5898187160491943, "epoch": 7.65, "learning_rate": 1.3069409223255378e-05, "loss": 0.5469, "step": 9047, "task_loss": 0.5957937240600586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6014208793640137, "epoch": 7.65, "learning_rate": 1.3064713064713066e-05, "loss": 0.5643, "step": 9048, "task_loss": 0.8048995733261108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9950630068778992, "epoch": 7.65, "learning_rate": 1.3060016906170752e-05, "loss": 0.6559, "step": 9049, "task_loss": 0.7754375338554382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4926382303237915, "epoch": 7.65, "learning_rate": 1.3055320747628442e-05, "loss": 0.535, "step": 9050, "task_loss": 0.26669442653656006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8775556683540344, "epoch": 7.65, "learning_rate": 1.3050624589086126e-05, "loss": 0.5539, "step": 9051, "task_loss": 0.9274231195449829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5096705555915833, "epoch": 7.65, "learning_rate": 1.3045928430543816e-05, "loss": 0.581, "step": 9052, "task_loss": 0.21778860688209534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5683337450027466, "epoch": 7.65, "learning_rate": 1.3041232272001503e-05, "loss": 0.5138, "step": 9053, "task_loss": 0.7031169533729553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4617474675178528, "epoch": 7.65, "learning_rate": 1.303653611345919e-05, "loss": 0.5944, "step": 9054, "task_loss": 0.8714993596076965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42280957102775574, "epoch": 7.65, "learning_rate": 1.3031839954916877e-05, "loss": 0.5865, "step": 9055, "task_loss": 0.6723194122314453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5408577919006348, "epoch": 7.65, "learning_rate": 1.3027143796374567e-05, "loss": 0.5282, "step": 9056, "task_loss": 0.861878514289856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8703663945198059, "epoch": 7.66, "learning_rate": 1.3022447637832255e-05, "loss": 0.6433, "step": 9057, "task_loss": 0.8380740284919739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5695338845252991, "epoch": 7.66, "learning_rate": 1.3017751479289941e-05, "loss": 0.6059, "step": 9058, "task_loss": 0.5634458065032959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6485887765884399, "epoch": 7.66, "learning_rate": 1.301305532074763e-05, "loss": 0.5838, "step": 9059, "task_loss": 0.19843529164791107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.580459713935852, "epoch": 7.66, "learning_rate": 1.3008359162205317e-05, "loss": 0.5649, "step": 9060, "task_loss": 0.4084705710411072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6998453140258789, "epoch": 7.66, "learning_rate": 1.3003663003663005e-05, "loss": 0.5327, "step": 9061, "task_loss": 0.5208227634429932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5457407832145691, "epoch": 7.66, "learning_rate": 1.2998966845120691e-05, "loss": 0.6063, "step": 9062, "task_loss": 0.47631439566612244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.863166332244873, "epoch": 7.66, "learning_rate": 1.299427068657838e-05, "loss": 0.8222, "step": 9063, "task_loss": 1.0228097438812256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8334242105484009, "epoch": 7.66, "learning_rate": 1.2989574528036066e-05, "loss": 0.7328, "step": 9064, "task_loss": 1.5601552724838257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9794629812240601, "epoch": 7.66, "learning_rate": 1.2984878369493756e-05, "loss": 0.7113, "step": 9065, "task_loss": 0.771183431148529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43146729469299316, "epoch": 7.66, "learning_rate": 1.2980182210951442e-05, "loss": 0.605, "step": 9066, "task_loss": 0.6143893003463745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7484097480773926, "epoch": 7.66, "learning_rate": 1.297548605240913e-05, "loss": 0.7027, "step": 9067, "task_loss": 1.0117928981781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0944373607635498, "epoch": 7.66, "learning_rate": 1.2970789893866816e-05, "loss": 0.7084, "step": 9068, "task_loss": 2.175537347793579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9182159304618835, "epoch": 7.67, "learning_rate": 1.2966093735324506e-05, "loss": 0.68, "step": 9069, "task_loss": 0.17643077671527863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.127352237701416, "epoch": 7.67, "learning_rate": 1.2961397576782192e-05, "loss": 0.7136, "step": 9070, "task_loss": 0.9668977856636047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6797225475311279, "epoch": 7.67, "learning_rate": 1.295670141823988e-05, "loss": 0.5786, "step": 9071, "task_loss": 1.035664439201355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7414901256561279, "epoch": 7.67, "learning_rate": 1.295200525969757e-05, "loss": 0.5827, "step": 9072, "task_loss": 0.47591114044189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7650946378707886, "epoch": 7.67, "learning_rate": 1.2947309101155255e-05, "loss": 0.7814, "step": 9073, "task_loss": 0.4345983862876892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6888265609741211, "epoch": 7.67, "learning_rate": 1.2942612942612944e-05, "loss": 0.695, "step": 9074, "task_loss": 0.7842326164245605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5396343469619751, "epoch": 7.67, "learning_rate": 1.293791678407063e-05, "loss": 0.6499, "step": 9075, "task_loss": 0.781366765499115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7076486945152283, "epoch": 7.67, "learning_rate": 1.2933220625528319e-05, "loss": 0.7921, "step": 9076, "task_loss": 0.8562763929367065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6688251495361328, "epoch": 7.67, "learning_rate": 1.2928524466986005e-05, "loss": 0.8087, "step": 9077, "task_loss": 1.161331057548523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6165298223495483, "epoch": 7.67, "learning_rate": 1.2923828308443695e-05, "loss": 0.6788, "step": 9078, "task_loss": 0.8113792538642883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9365557432174683, "epoch": 7.67, "learning_rate": 1.2919132149901381e-05, "loss": 0.9046, "step": 9079, "task_loss": 1.1113063097000122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5746587514877319, "epoch": 7.67, "learning_rate": 1.291443599135907e-05, "loss": 0.5086, "step": 9080, "task_loss": 0.25513365864753723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.722360372543335, "epoch": 7.68, "learning_rate": 1.2909739832816756e-05, "loss": 0.5339, "step": 9081, "task_loss": 1.0169212818145752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5195110440254211, "epoch": 7.68, "learning_rate": 1.2905043674274445e-05, "loss": 0.3984, "step": 9082, "task_loss": 0.40439295768737793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5156396627426147, "epoch": 7.68, "learning_rate": 1.290034751573213e-05, "loss": 0.6077, "step": 9083, "task_loss": 1.0731475353240967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5853645205497742, "epoch": 7.68, "learning_rate": 1.289565135718982e-05, "loss": 0.6383, "step": 9084, "task_loss": 1.2003706693649292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4770641624927521, "epoch": 7.68, "learning_rate": 1.2890955198647506e-05, "loss": 0.6225, "step": 9085, "task_loss": 0.6634112596511841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5818284153938293, "epoch": 7.68, "learning_rate": 1.2886259040105194e-05, "loss": 0.5852, "step": 9086, "task_loss": 0.9344972968101501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8461856245994568, "epoch": 7.68, "learning_rate": 1.288156288156288e-05, "loss": 0.8108, "step": 9087, "task_loss": 0.39087262749671936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5091992616653442, "epoch": 7.68, "learning_rate": 1.287686672302057e-05, "loss": 0.6821, "step": 9088, "task_loss": 0.7031358480453491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5288878679275513, "epoch": 7.68, "learning_rate": 1.2872170564478258e-05, "loss": 0.5358, "step": 9089, "task_loss": 0.4093533456325531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42600560188293457, "epoch": 7.68, "learning_rate": 1.2867474405935945e-05, "loss": 0.5062, "step": 9090, "task_loss": 0.409268319606781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6948527097702026, "epoch": 7.68, "learning_rate": 1.2862778247393634e-05, "loss": 0.6799, "step": 9091, "task_loss": 1.1037144660949707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48970529437065125, "epoch": 7.69, "learning_rate": 1.285808208885132e-05, "loss": 0.4723, "step": 9092, "task_loss": 0.9574061632156372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7055870294570923, "epoch": 7.69, "learning_rate": 1.2853385930309009e-05, "loss": 0.8125, "step": 9093, "task_loss": 1.5533785820007324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6166523694992065, "epoch": 7.69, "learning_rate": 1.2848689771766695e-05, "loss": 0.4769, "step": 9094, "task_loss": 0.5199586749076843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7846020460128784, "epoch": 7.69, "learning_rate": 1.2843993613224383e-05, "loss": 0.6677, "step": 9095, "task_loss": 0.3481350541114807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6348507404327393, "epoch": 7.69, "learning_rate": 1.283929745468207e-05, "loss": 0.7002, "step": 9096, "task_loss": 0.101246677339077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7383549213409424, "epoch": 7.69, "learning_rate": 1.2834601296139759e-05, "loss": 0.7325, "step": 9097, "task_loss": 0.6932706236839294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6064481735229492, "epoch": 7.69, "learning_rate": 1.2829905137597445e-05, "loss": 0.6677, "step": 9098, "task_loss": 0.9959902763366699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7460986375808716, "epoch": 7.69, "learning_rate": 1.2825208979055133e-05, "loss": 0.8771, "step": 9099, "task_loss": 0.35735008120536804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31088483333587646, "epoch": 7.69, "learning_rate": 1.282051282051282e-05, "loss": 0.4977, "step": 9100, "task_loss": 0.41531816124916077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4399469196796417, "epoch": 7.69, "learning_rate": 1.281581666197051e-05, "loss": 0.4764, "step": 9101, "task_loss": 0.2638051509857178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5483927726745605, "epoch": 7.69, "learning_rate": 1.2811120503428194e-05, "loss": 0.7473, "step": 9102, "task_loss": 0.3054583966732025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8244616389274597, "epoch": 7.69, "learning_rate": 1.2806424344885884e-05, "loss": 0.6994, "step": 9103, "task_loss": 1.0471986532211304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3091169595718384, "epoch": 7.7, "learning_rate": 1.2801728186343574e-05, "loss": 0.6471, "step": 9104, "task_loss": 1.2121503353118896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5193596482276917, "epoch": 7.7, "learning_rate": 1.2797032027801258e-05, "loss": 0.4325, "step": 9105, "task_loss": 0.22953557968139648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3810580372810364, "epoch": 7.7, "learning_rate": 1.2792335869258948e-05, "loss": 0.5, "step": 9106, "task_loss": 0.4794789254665375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.905901312828064, "epoch": 7.7, "learning_rate": 1.2787639710716634e-05, "loss": 0.674, "step": 9107, "task_loss": 0.8848867416381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6959438920021057, "epoch": 7.7, "learning_rate": 1.2782943552174322e-05, "loss": 0.6946, "step": 9108, "task_loss": 0.8566098213195801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6713850498199463, "epoch": 7.7, "learning_rate": 1.2778247393632009e-05, "loss": 0.6673, "step": 9109, "task_loss": 0.4564402997493744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0372607707977295, "epoch": 7.7, "learning_rate": 1.2773551235089698e-05, "loss": 0.8259, "step": 9110, "task_loss": 1.082775354385376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36567747592926025, "epoch": 7.7, "learning_rate": 1.2768855076547385e-05, "loss": 0.639, "step": 9111, "task_loss": 0.8418704867362976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5722159743309021, "epoch": 7.7, "learning_rate": 1.2764158918005073e-05, "loss": 0.4769, "step": 9112, "task_loss": 0.3902459144592285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3350951075553894, "epoch": 7.7, "learning_rate": 1.275946275946276e-05, "loss": 0.3293, "step": 9113, "task_loss": 0.6242761015892029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48244649171829224, "epoch": 7.7, "learning_rate": 1.2754766600920449e-05, "loss": 0.6871, "step": 9114, "task_loss": 0.839927613735199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41007760167121887, "epoch": 7.7, "learning_rate": 1.2750070442378134e-05, "loss": 0.6077, "step": 9115, "task_loss": 1.0131207704544067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44290614128112793, "epoch": 7.71, "learning_rate": 1.2745374283835823e-05, "loss": 0.5402, "step": 9116, "task_loss": 0.2894012928009033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5615799427032471, "epoch": 7.71, "learning_rate": 1.274067812529351e-05, "loss": 0.5994, "step": 9117, "task_loss": 0.7510315179824829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43828704953193665, "epoch": 7.71, "learning_rate": 1.2735981966751198e-05, "loss": 0.6726, "step": 9118, "task_loss": 0.8060537576675415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.615739643573761, "epoch": 7.71, "learning_rate": 1.2731285808208887e-05, "loss": 0.7048, "step": 9119, "task_loss": 0.24548418819904327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5567545890808105, "epoch": 7.71, "learning_rate": 1.2726589649666574e-05, "loss": 0.5638, "step": 9120, "task_loss": 0.4917210340499878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7014592885971069, "epoch": 7.71, "learning_rate": 1.2721893491124262e-05, "loss": 0.5981, "step": 9121, "task_loss": 1.6593323945999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.580632209777832, "epoch": 7.71, "learning_rate": 1.2717197332581948e-05, "loss": 0.6642, "step": 9122, "task_loss": 0.806998610496521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4691159725189209, "epoch": 7.71, "learning_rate": 1.2712501174039638e-05, "loss": 0.5977, "step": 9123, "task_loss": 1.0986748933792114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5824064612388611, "epoch": 7.71, "learning_rate": 1.2707805015497322e-05, "loss": 0.621, "step": 9124, "task_loss": 1.3755930662155151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.24288761615753174, "epoch": 7.71, "learning_rate": 1.2703108856955012e-05, "loss": 0.5625, "step": 9125, "task_loss": 0.051154494285583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4740070700645447, "epoch": 7.71, "learning_rate": 1.2698412698412699e-05, "loss": 0.5192, "step": 9126, "task_loss": 0.4311660826206207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7269397974014282, "epoch": 7.71, "learning_rate": 1.2693716539870387e-05, "loss": 0.6431, "step": 9127, "task_loss": 0.9512178897857666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5863138437271118, "epoch": 7.72, "learning_rate": 1.2689020381328073e-05, "loss": 0.7079, "step": 9128, "task_loss": 1.4563648700714111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39926332235336304, "epoch": 7.72, "learning_rate": 1.2684324222785763e-05, "loss": 0.5599, "step": 9129, "task_loss": 1.1902014017105103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4026908278465271, "epoch": 7.72, "learning_rate": 1.2679628064243449e-05, "loss": 0.6192, "step": 9130, "task_loss": 0.7239860892295837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4563809335231781, "epoch": 7.72, "learning_rate": 1.2674931905701137e-05, "loss": 0.7672, "step": 9131, "task_loss": 0.3404676020145416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7566729784011841, "epoch": 7.72, "learning_rate": 1.2670235747158823e-05, "loss": 0.6782, "step": 9132, "task_loss": 0.42687010765075684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.18166440725326538, "epoch": 7.72, "learning_rate": 1.2665539588616513e-05, "loss": 0.4507, "step": 9133, "task_loss": 0.036593835800886154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4890053868293762, "epoch": 7.72, "learning_rate": 1.2660843430074201e-05, "loss": 0.7933, "step": 9134, "task_loss": 1.1616226434707642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5537952780723572, "epoch": 7.72, "learning_rate": 1.2656147271531887e-05, "loss": 0.5149, "step": 9135, "task_loss": 0.6069802045822144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5156192779541016, "epoch": 7.72, "learning_rate": 1.2651451112989575e-05, "loss": 0.5399, "step": 9136, "task_loss": 1.1199874877929688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4933267831802368, "epoch": 7.72, "learning_rate": 1.2646754954447262e-05, "loss": 0.6113, "step": 9137, "task_loss": 0.4185149073600769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7207887172698975, "epoch": 7.72, "learning_rate": 1.2642058795904952e-05, "loss": 0.6317, "step": 9138, "task_loss": 0.6087276339530945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.705646276473999, "epoch": 7.72, "learning_rate": 1.2637362637362638e-05, "loss": 0.8563, "step": 9139, "task_loss": 1.012412667274475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5746712684631348, "epoch": 7.73, "learning_rate": 1.2632666478820326e-05, "loss": 0.666, "step": 9140, "task_loss": 1.0685789585113525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43536290526390076, "epoch": 7.73, "learning_rate": 1.2627970320278012e-05, "loss": 0.5447, "step": 9141, "task_loss": 0.5452060103416443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5394901633262634, "epoch": 7.73, "learning_rate": 1.2623274161735702e-05, "loss": 0.6106, "step": 9142, "task_loss": 1.300814151763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9475088119506836, "epoch": 7.73, "learning_rate": 1.2618578003193388e-05, "loss": 0.8658, "step": 9143, "task_loss": 1.3723053932189941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33425259590148926, "epoch": 7.73, "learning_rate": 1.2613881844651076e-05, "loss": 0.5864, "step": 9144, "task_loss": 0.7204646468162537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7013534307479858, "epoch": 7.73, "learning_rate": 1.2609185686108763e-05, "loss": 0.5867, "step": 9145, "task_loss": 1.0429000854492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3698098063468933, "epoch": 7.73, "learning_rate": 1.260448952756645e-05, "loss": 0.4618, "step": 9146, "task_loss": 0.3355768322944641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7327256202697754, "epoch": 7.73, "learning_rate": 1.2599793369024137e-05, "loss": 0.5611, "step": 9147, "task_loss": 0.5533397197723389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5181212425231934, "epoch": 7.73, "learning_rate": 1.2595097210481827e-05, "loss": 0.5995, "step": 9148, "task_loss": 1.3067476749420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5996986031532288, "epoch": 7.73, "learning_rate": 1.2590401051939515e-05, "loss": 0.7406, "step": 9149, "task_loss": 0.5035154819488525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6594605445861816, "epoch": 7.73, "learning_rate": 1.2585704893397201e-05, "loss": 0.7067, "step": 9150, "task_loss": 1.3502967357635498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0735517740249634, "epoch": 7.73, "learning_rate": 1.2581008734854891e-05, "loss": 0.8276, "step": 9151, "task_loss": 1.7753902673721313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6830829381942749, "epoch": 7.74, "learning_rate": 1.2576312576312577e-05, "loss": 0.5472, "step": 9152, "task_loss": 1.046669363975525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6330386400222778, "epoch": 7.74, "learning_rate": 1.2571616417770265e-05, "loss": 0.5793, "step": 9153, "task_loss": 0.5685707330703735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3661718964576721, "epoch": 7.74, "learning_rate": 1.2566920259227952e-05, "loss": 0.569, "step": 9154, "task_loss": 0.6226862668991089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3578072190284729, "epoch": 7.74, "learning_rate": 1.2562224100685641e-05, "loss": 0.7324, "step": 9155, "task_loss": 0.4164118468761444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4766775667667389, "epoch": 7.74, "learning_rate": 1.2557527942143326e-05, "loss": 0.5271, "step": 9156, "task_loss": 0.6421356201171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5595275163650513, "epoch": 7.74, "learning_rate": 1.2552831783601016e-05, "loss": 0.5607, "step": 9157, "task_loss": 1.1562862396240234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46680909395217896, "epoch": 7.74, "learning_rate": 1.2548135625058702e-05, "loss": 0.5917, "step": 9158, "task_loss": 1.1353721618652344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6528694033622742, "epoch": 7.74, "learning_rate": 1.254343946651639e-05, "loss": 0.6154, "step": 9159, "task_loss": 0.8469077348709106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3327333927154541, "epoch": 7.74, "learning_rate": 1.2538743307974076e-05, "loss": 0.525, "step": 9160, "task_loss": 0.3638368248939514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6519312858581543, "epoch": 7.74, "learning_rate": 1.2534047149431766e-05, "loss": 0.5975, "step": 9161, "task_loss": 0.9959648251533508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7973043918609619, "epoch": 7.74, "learning_rate": 1.2529350990889453e-05, "loss": 0.6715, "step": 9162, "task_loss": 2.1171114444732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5737801790237427, "epoch": 7.75, "learning_rate": 1.252465483234714e-05, "loss": 0.5112, "step": 9163, "task_loss": 0.6324585676193237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5482625961303711, "epoch": 7.75, "learning_rate": 1.2519958673804827e-05, "loss": 0.564, "step": 9164, "task_loss": 1.3259679079055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9230426549911499, "epoch": 7.75, "learning_rate": 1.2515262515262517e-05, "loss": 0.7227, "step": 9165, "task_loss": 0.6075318455696106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47031185030937195, "epoch": 7.75, "learning_rate": 1.2510566356720205e-05, "loss": 0.4521, "step": 9166, "task_loss": 0.5812268257141113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9825012683868408, "epoch": 7.75, "learning_rate": 1.2505870198177891e-05, "loss": 0.6809, "step": 9167, "task_loss": 1.353095531463623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7288048267364502, "epoch": 7.75, "learning_rate": 1.2501174039635579e-05, "loss": 0.6443, "step": 9168, "task_loss": 0.7890456914901733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4178343117237091, "epoch": 7.75, "learning_rate": 1.2496477881093265e-05, "loss": 0.4821, "step": 9169, "task_loss": 0.31060272455215454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4882894456386566, "epoch": 7.75, "learning_rate": 1.2491781722550953e-05, "loss": 0.5343, "step": 9170, "task_loss": 0.662736177444458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5568426847457886, "epoch": 7.75, "learning_rate": 1.2487085564008641e-05, "loss": 0.7393, "step": 9171, "task_loss": 0.9950714111328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7702128291130066, "epoch": 7.75, "learning_rate": 1.2482389405466328e-05, "loss": 0.7508, "step": 9172, "task_loss": 0.5499085783958435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6226160526275635, "epoch": 7.75, "learning_rate": 1.2477693246924017e-05, "loss": 0.6324, "step": 9173, "task_loss": 0.19818687438964844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8429038524627686, "epoch": 7.75, "learning_rate": 1.2472997088381706e-05, "loss": 0.6864, "step": 9174, "task_loss": 0.8326199650764465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6501444578170776, "epoch": 7.76, "learning_rate": 1.2468300929839392e-05, "loss": 0.5998, "step": 9175, "task_loss": 0.8628482222557068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9112529754638672, "epoch": 7.76, "learning_rate": 1.246360477129708e-05, "loss": 0.6751, "step": 9176, "task_loss": 0.5033800601959229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.559431791305542, "epoch": 7.76, "learning_rate": 1.2458908612754768e-05, "loss": 0.5565, "step": 9177, "task_loss": 1.1971118450164795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4931434392929077, "epoch": 7.76, "learning_rate": 1.2454212454212454e-05, "loss": 0.5114, "step": 9178, "task_loss": 0.7337039709091187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48618826270103455, "epoch": 7.76, "learning_rate": 1.2449516295670142e-05, "loss": 0.6297, "step": 9179, "task_loss": 0.47725483775138855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8665053844451904, "epoch": 7.76, "learning_rate": 1.244482013712783e-05, "loss": 0.7606, "step": 9180, "task_loss": 0.9061046838760376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4813166558742523, "epoch": 7.76, "learning_rate": 1.2440123978585518e-05, "loss": 0.6378, "step": 9181, "task_loss": 0.9006826877593994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32420799136161804, "epoch": 7.76, "learning_rate": 1.2435427820043205e-05, "loss": 0.4714, "step": 9182, "task_loss": 0.10279767960309982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6458555459976196, "epoch": 7.76, "learning_rate": 1.2430731661500893e-05, "loss": 0.7872, "step": 9183, "task_loss": 0.34597325325012207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6857365369796753, "epoch": 7.76, "learning_rate": 1.242603550295858e-05, "loss": 0.5362, "step": 9184, "task_loss": 0.8836653232574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5644246339797974, "epoch": 7.76, "learning_rate": 1.2421339344416267e-05, "loss": 0.6422, "step": 9185, "task_loss": 0.2629653215408325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4994243383407593, "epoch": 7.76, "learning_rate": 1.2416643185873955e-05, "loss": 0.5712, "step": 9186, "task_loss": 1.4908876419067383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5470830202102661, "epoch": 7.77, "learning_rate": 1.2411947027331643e-05, "loss": 0.6192, "step": 9187, "task_loss": 0.2291332185268402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4141067862510681, "epoch": 7.77, "learning_rate": 1.2407250868789331e-05, "loss": 0.498, "step": 9188, "task_loss": 0.3309977650642395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7059606909751892, "epoch": 7.77, "learning_rate": 1.240255471024702e-05, "loss": 0.543, "step": 9189, "task_loss": 1.2484396696090698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6303499937057495, "epoch": 7.77, "learning_rate": 1.2397858551704707e-05, "loss": 0.6173, "step": 9190, "task_loss": 0.7463486790657043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1179356575012207, "epoch": 7.77, "learning_rate": 1.2393162393162394e-05, "loss": 0.6472, "step": 9191, "task_loss": 1.4096165895462036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3600737452507019, "epoch": 7.77, "learning_rate": 1.2388466234620082e-05, "loss": 0.5546, "step": 9192, "task_loss": 0.8780907392501831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8203255534172058, "epoch": 7.77, "learning_rate": 1.238377007607777e-05, "loss": 0.6403, "step": 9193, "task_loss": 1.1304914951324463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7810653448104858, "epoch": 7.77, "learning_rate": 1.2379073917535456e-05, "loss": 0.649, "step": 9194, "task_loss": 1.6029305458068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38124603033065796, "epoch": 7.77, "learning_rate": 1.2374377758993144e-05, "loss": 0.629, "step": 9195, "task_loss": 0.05848316103219986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5188174843788147, "epoch": 7.77, "learning_rate": 1.2369681600450832e-05, "loss": 0.5927, "step": 9196, "task_loss": 0.5620988011360168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7826317548751831, "epoch": 7.77, "learning_rate": 1.2364985441908518e-05, "loss": 0.6421, "step": 9197, "task_loss": 1.175435185432434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43062925338745117, "epoch": 7.77, "learning_rate": 1.2360289283366206e-05, "loss": 0.5141, "step": 9198, "task_loss": 0.4835112392902374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5537911057472229, "epoch": 7.78, "learning_rate": 1.2355593124823895e-05, "loss": 0.5606, "step": 9199, "task_loss": 0.8834670186042786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5750342607498169, "epoch": 7.78, "learning_rate": 1.2350896966281583e-05, "loss": 0.5963, "step": 9200, "task_loss": 1.5788229703903198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6749097108840942, "epoch": 7.78, "learning_rate": 1.2346200807739269e-05, "loss": 0.8095, "step": 9201, "task_loss": 1.3601738214492798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9442257881164551, "epoch": 7.78, "learning_rate": 1.2341504649196957e-05, "loss": 0.7306, "step": 9202, "task_loss": 0.9562928676605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5915853381156921, "epoch": 7.78, "learning_rate": 1.2336808490654645e-05, "loss": 0.622, "step": 9203, "task_loss": 1.65479576587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4696558117866516, "epoch": 7.78, "learning_rate": 1.2332112332112333e-05, "loss": 0.6024, "step": 9204, "task_loss": 0.9211612343788147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7393296957015991, "epoch": 7.78, "learning_rate": 1.2327416173570021e-05, "loss": 0.6212, "step": 9205, "task_loss": 0.69806307554245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6790538430213928, "epoch": 7.78, "learning_rate": 1.2322720015027709e-05, "loss": 0.7038, "step": 9206, "task_loss": 0.47628894448280334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5161414742469788, "epoch": 7.78, "learning_rate": 1.2318023856485395e-05, "loss": 0.5931, "step": 9207, "task_loss": 0.6558088064193726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4002099335193634, "epoch": 7.78, "learning_rate": 1.2313327697943083e-05, "loss": 0.6474, "step": 9208, "task_loss": 0.47103366255760193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46449607610702515, "epoch": 7.78, "learning_rate": 1.2308631539400771e-05, "loss": 0.632, "step": 9209, "task_loss": 0.21072576940059662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.334917277097702, "epoch": 7.78, "learning_rate": 1.2303935380858458e-05, "loss": 0.5512, "step": 9210, "task_loss": 1.0161563158035278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4747670888900757, "epoch": 7.79, "learning_rate": 1.2299239222316146e-05, "loss": 0.5291, "step": 9211, "task_loss": 0.663444459438324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7980385422706604, "epoch": 7.79, "learning_rate": 1.2294543063773834e-05, "loss": 0.8725, "step": 9212, "task_loss": 1.548295021057129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40648791193962097, "epoch": 7.79, "learning_rate": 1.228984690523152e-05, "loss": 0.5279, "step": 9213, "task_loss": 0.6408501863479614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46807074546813965, "epoch": 7.79, "learning_rate": 1.2285150746689208e-05, "loss": 0.6428, "step": 9214, "task_loss": 0.21138949692249298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4050152003765106, "epoch": 7.79, "learning_rate": 1.2280454588146896e-05, "loss": 0.4776, "step": 9215, "task_loss": 0.8811978101730347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5405423641204834, "epoch": 7.79, "learning_rate": 1.2275758429604584e-05, "loss": 0.634, "step": 9216, "task_loss": 0.6027246713638306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5813490748405457, "epoch": 7.79, "learning_rate": 1.227106227106227e-05, "loss": 0.5985, "step": 9217, "task_loss": 0.8912213444709778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9439423084259033, "epoch": 7.79, "learning_rate": 1.2266366112519959e-05, "loss": 0.6345, "step": 9218, "task_loss": 1.637570858001709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3715919554233551, "epoch": 7.79, "learning_rate": 1.2261669953977647e-05, "loss": 0.6343, "step": 9219, "task_loss": 0.49685850739479065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6585158109664917, "epoch": 7.79, "learning_rate": 1.2256973795435335e-05, "loss": 0.5348, "step": 9220, "task_loss": 0.808984637260437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5752423405647278, "epoch": 7.79, "learning_rate": 1.2252277636893023e-05, "loss": 0.7123, "step": 9221, "task_loss": 0.29104629158973694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4112965166568756, "epoch": 7.79, "learning_rate": 1.224758147835071e-05, "loss": 0.6571, "step": 9222, "task_loss": 0.479967325925827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7448669672012329, "epoch": 7.8, "learning_rate": 1.2242885319808397e-05, "loss": 0.5892, "step": 9223, "task_loss": 0.7633129358291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3633204996585846, "epoch": 7.8, "learning_rate": 1.2238189161266085e-05, "loss": 0.4995, "step": 9224, "task_loss": 0.643604576587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4923507273197174, "epoch": 7.8, "learning_rate": 1.2233493002723773e-05, "loss": 0.6384, "step": 9225, "task_loss": 0.7829862236976624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5855726003646851, "epoch": 7.8, "learning_rate": 1.222879684418146e-05, "loss": 0.8244, "step": 9226, "task_loss": 1.1817216873168945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6873863339424133, "epoch": 7.8, "learning_rate": 1.2224100685639148e-05, "loss": 0.6626, "step": 9227, "task_loss": 1.2474788427352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3575519919395447, "epoch": 7.8, "learning_rate": 1.2219404527096836e-05, "loss": 0.4941, "step": 9228, "task_loss": 0.30007925629615784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9238381385803223, "epoch": 7.8, "learning_rate": 1.2214708368554522e-05, "loss": 0.6408, "step": 9229, "task_loss": 0.8237901926040649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3202037811279297, "epoch": 7.8, "learning_rate": 1.221001221001221e-05, "loss": 0.5442, "step": 9230, "task_loss": 0.8448647260665894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49808260798454285, "epoch": 7.8, "learning_rate": 1.2205316051469898e-05, "loss": 0.5241, "step": 9231, "task_loss": 0.41476675868034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6203761100769043, "epoch": 7.8, "learning_rate": 1.2200619892927586e-05, "loss": 0.5033, "step": 9232, "task_loss": 0.7833760976791382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7851611375808716, "epoch": 7.8, "learning_rate": 1.2195923734385272e-05, "loss": 0.6598, "step": 9233, "task_loss": 1.068286418914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8005359768867493, "epoch": 7.81, "learning_rate": 1.2191227575842962e-05, "loss": 0.6553, "step": 9234, "task_loss": 0.8778249621391296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6215698719024658, "epoch": 7.81, "learning_rate": 1.2186531417300648e-05, "loss": 0.6302, "step": 9235, "task_loss": 0.5525217056274414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6312036514282227, "epoch": 7.81, "learning_rate": 1.2181835258758337e-05, "loss": 0.5289, "step": 9236, "task_loss": 0.8525466918945312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6585193872451782, "epoch": 7.81, "learning_rate": 1.2177139100216025e-05, "loss": 0.6005, "step": 9237, "task_loss": 1.166562557220459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8315794467926025, "epoch": 7.81, "learning_rate": 1.2172442941673713e-05, "loss": 0.6606, "step": 9238, "task_loss": 0.45985886454582214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7767733931541443, "epoch": 7.81, "learning_rate": 1.2167746783131399e-05, "loss": 0.4407, "step": 9239, "task_loss": 0.7310854196548462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4326306879520416, "epoch": 7.81, "learning_rate": 1.2163050624589087e-05, "loss": 0.6789, "step": 9240, "task_loss": 0.13972207903862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5320179462432861, "epoch": 7.81, "learning_rate": 1.2158354466046775e-05, "loss": 0.5768, "step": 9241, "task_loss": 0.4122759699821472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065739393234253, "epoch": 7.81, "learning_rate": 1.2153658307504461e-05, "loss": 0.5502, "step": 9242, "task_loss": 0.8673749566078186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6351373195648193, "epoch": 7.81, "learning_rate": 1.214896214896215e-05, "loss": 0.7261, "step": 9243, "task_loss": 0.6195255517959595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4116397500038147, "epoch": 7.81, "learning_rate": 1.2144265990419837e-05, "loss": 0.4012, "step": 9244, "task_loss": 1.0053728818893433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6081851720809937, "epoch": 7.81, "learning_rate": 1.2139569831877524e-05, "loss": 0.7202, "step": 9245, "task_loss": 1.4444200992584229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4677433967590332, "epoch": 7.82, "learning_rate": 1.2134873673335212e-05, "loss": 0.6233, "step": 9246, "task_loss": 0.40733927488327026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6671141386032104, "epoch": 7.82, "learning_rate": 1.21301775147929e-05, "loss": 0.5914, "step": 9247, "task_loss": 0.8897861242294312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.880736231803894, "epoch": 7.82, "learning_rate": 1.2125481356250588e-05, "loss": 0.7564, "step": 9248, "task_loss": 1.4604625701904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48671454191207886, "epoch": 7.82, "learning_rate": 1.2120785197708274e-05, "loss": 0.4974, "step": 9249, "task_loss": 0.7679075598716736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41997334361076355, "epoch": 7.82, "learning_rate": 1.2116089039165964e-05, "loss": 0.5089, "step": 9250, "task_loss": 0.5197562575340271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6217628121376038, "epoch": 7.82, "learning_rate": 1.211139288062365e-05, "loss": 0.6808, "step": 9251, "task_loss": 1.4101309776306152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6243614554405212, "epoch": 7.82, "learning_rate": 1.2106696722081338e-05, "loss": 0.6501, "step": 9252, "task_loss": 1.6311492919921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7083710432052612, "epoch": 7.82, "learning_rate": 1.2102000563539026e-05, "loss": 0.7277, "step": 9253, "task_loss": 0.5568885207176208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5752865076065063, "epoch": 7.82, "learning_rate": 1.2097304404996714e-05, "loss": 0.6648, "step": 9254, "task_loss": 0.4532527029514313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8630462288856506, "epoch": 7.82, "learning_rate": 1.20926082464544e-05, "loss": 0.7515, "step": 9255, "task_loss": 1.0270709991455078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5599292516708374, "epoch": 7.82, "learning_rate": 1.2087912087912089e-05, "loss": 0.5382, "step": 9256, "task_loss": 0.7215776443481445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33189085125923157, "epoch": 7.82, "learning_rate": 1.2083215929369777e-05, "loss": 0.4737, "step": 9257, "task_loss": 0.7015829682350159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.502589225769043, "epoch": 7.83, "learning_rate": 1.2078519770827463e-05, "loss": 0.463, "step": 9258, "task_loss": 0.3994985818862915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.010279655456543, "epoch": 7.83, "learning_rate": 1.2073823612285151e-05, "loss": 0.7966, "step": 9259, "task_loss": 1.2055457830429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8681115508079529, "epoch": 7.83, "learning_rate": 1.206912745374284e-05, "loss": 0.6622, "step": 9260, "task_loss": 1.4711294174194336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7401294708251953, "epoch": 7.83, "learning_rate": 1.2064431295200525e-05, "loss": 0.6582, "step": 9261, "task_loss": 1.0682728290557861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2743995189666748, "epoch": 7.83, "learning_rate": 1.2059735136658214e-05, "loss": 0.7289, "step": 9262, "task_loss": 0.09796668589115143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.596108078956604, "epoch": 7.83, "learning_rate": 1.2055038978115902e-05, "loss": 0.4158, "step": 9263, "task_loss": 0.6065584421157837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8598300814628601, "epoch": 7.83, "learning_rate": 1.2050342819573588e-05, "loss": 0.5242, "step": 9264, "task_loss": 1.0548279285430908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.604052722454071, "epoch": 7.83, "learning_rate": 1.2045646661031278e-05, "loss": 0.5993, "step": 9265, "task_loss": 0.30261075496673584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4653339982032776, "epoch": 7.83, "learning_rate": 1.2040950502488966e-05, "loss": 0.7732, "step": 9266, "task_loss": 0.20248746871948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8494768142700195, "epoch": 7.83, "learning_rate": 1.2036254343946652e-05, "loss": 0.6431, "step": 9267, "task_loss": 1.488345980644226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.478204607963562, "epoch": 7.83, "learning_rate": 1.203155818540434e-05, "loss": 0.6697, "step": 9268, "task_loss": 1.0240557193756104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37322986125946045, "epoch": 7.83, "learning_rate": 1.2026862026862028e-05, "loss": 0.5097, "step": 9269, "task_loss": 0.47234973311424255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5139741897583008, "epoch": 7.84, "learning_rate": 1.2022165868319714e-05, "loss": 0.5897, "step": 9270, "task_loss": 0.07266499102115631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5163543224334717, "epoch": 7.84, "learning_rate": 1.2017469709777402e-05, "loss": 0.6082, "step": 9271, "task_loss": 0.35763078927993774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8312714695930481, "epoch": 7.84, "learning_rate": 1.201277355123509e-05, "loss": 0.7521, "step": 9272, "task_loss": 0.8444193005561829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6552348136901855, "epoch": 7.84, "learning_rate": 1.2008077392692779e-05, "loss": 0.6107, "step": 9273, "task_loss": 1.1751883029937744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38688188791275024, "epoch": 7.84, "learning_rate": 1.2003381234150465e-05, "loss": 0.4258, "step": 9274, "task_loss": 0.9498867988586426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8224754333496094, "epoch": 7.84, "learning_rate": 1.1998685075608153e-05, "loss": 0.6323, "step": 9275, "task_loss": 1.7587980031967163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5195527672767639, "epoch": 7.84, "learning_rate": 1.1993988917065841e-05, "loss": 0.6414, "step": 9276, "task_loss": 0.705010712146759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6050170063972473, "epoch": 7.84, "learning_rate": 1.1989292758523527e-05, "loss": 0.4825, "step": 9277, "task_loss": 0.8411605358123779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3979472815990448, "epoch": 7.84, "learning_rate": 1.1984596599981215e-05, "loss": 0.5386, "step": 9278, "task_loss": 0.5219746828079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2698785662651062, "epoch": 7.84, "learning_rate": 1.1979900441438903e-05, "loss": 0.4869, "step": 9279, "task_loss": 0.09238096326589584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5532017350196838, "epoch": 7.84, "learning_rate": 1.1975204282896591e-05, "loss": 0.5909, "step": 9280, "task_loss": 0.26685094833374023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7017921805381775, "epoch": 7.84, "learning_rate": 1.197050812435428e-05, "loss": 0.5401, "step": 9281, "task_loss": 0.7231501936912537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6045128703117371, "epoch": 7.85, "learning_rate": 1.1965811965811967e-05, "loss": 0.6947, "step": 9282, "task_loss": 0.5414072871208191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4864766597747803, "epoch": 7.85, "learning_rate": 1.1961115807269654e-05, "loss": 0.4459, "step": 9283, "task_loss": 1.333050012588501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7981334924697876, "epoch": 7.85, "learning_rate": 1.1956419648727342e-05, "loss": 0.6235, "step": 9284, "task_loss": 0.6602625250816345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.760779619216919, "epoch": 7.85, "learning_rate": 1.195172349018503e-05, "loss": 0.6774, "step": 9285, "task_loss": 0.7662138938903809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8918028473854065, "epoch": 7.85, "learning_rate": 1.1947027331642716e-05, "loss": 0.7905, "step": 9286, "task_loss": 0.8125105500221252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4679436981678009, "epoch": 7.85, "learning_rate": 1.1942331173100404e-05, "loss": 0.5304, "step": 9287, "task_loss": 0.6071472764015198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49994805455207825, "epoch": 7.85, "learning_rate": 1.1937635014558092e-05, "loss": 0.5519, "step": 9288, "task_loss": 1.2786537408828735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5234246850013733, "epoch": 7.85, "learning_rate": 1.193293885601578e-05, "loss": 0.5673, "step": 9289, "task_loss": 0.4634457528591156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49539893865585327, "epoch": 7.85, "learning_rate": 1.1928242697473467e-05, "loss": 0.5259, "step": 9290, "task_loss": 0.938583493232727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45192965865135193, "epoch": 7.85, "learning_rate": 1.1923546538931155e-05, "loss": 0.4154, "step": 9291, "task_loss": 0.4525604844093323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.66815185546875, "epoch": 7.85, "learning_rate": 1.1918850380388843e-05, "loss": 0.5826, "step": 9292, "task_loss": 1.1670300960540771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6612383723258972, "epoch": 7.85, "learning_rate": 1.1914154221846529e-05, "loss": 0.564, "step": 9293, "task_loss": 0.3871161639690399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44947350025177, "epoch": 7.86, "learning_rate": 1.1909458063304217e-05, "loss": 0.5045, "step": 9294, "task_loss": 0.7586336135864258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.50494384765625, "epoch": 7.86, "learning_rate": 1.1904761904761905e-05, "loss": 0.5486, "step": 9295, "task_loss": 0.4372828006744385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5112669467926025, "epoch": 7.86, "learning_rate": 1.1900065746219593e-05, "loss": 0.5172, "step": 9296, "task_loss": 0.48432207107543945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7695194482803345, "epoch": 7.86, "learning_rate": 1.1895369587677281e-05, "loss": 0.71, "step": 9297, "task_loss": 0.7044280767440796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9119033813476562, "epoch": 7.86, "learning_rate": 1.189067342913497e-05, "loss": 0.6785, "step": 9298, "task_loss": 0.6933794617652893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8601858019828796, "epoch": 7.86, "learning_rate": 1.1885977270592656e-05, "loss": 0.699, "step": 9299, "task_loss": 1.2532027959823608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7136693596839905, "epoch": 7.86, "learning_rate": 1.1881281112050344e-05, "loss": 0.6999, "step": 9300, "task_loss": 1.7915784120559692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5366383790969849, "epoch": 7.86, "learning_rate": 1.1876584953508032e-05, "loss": 0.5622, "step": 9301, "task_loss": 0.46840882301330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.568282425403595, "epoch": 7.86, "learning_rate": 1.1871888794965718e-05, "loss": 0.5758, "step": 9302, "task_loss": 1.2255793809890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5616037845611572, "epoch": 7.86, "learning_rate": 1.1867192636423406e-05, "loss": 0.506, "step": 9303, "task_loss": 0.5363072752952576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4846736490726471, "epoch": 7.86, "learning_rate": 1.1862496477881094e-05, "loss": 0.63, "step": 9304, "task_loss": 1.1016818284988403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6819823980331421, "epoch": 7.87, "learning_rate": 1.1857800319338782e-05, "loss": 0.6388, "step": 9305, "task_loss": 0.6065698266029358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7092506289482117, "epoch": 7.87, "learning_rate": 1.1853104160796468e-05, "loss": 0.64, "step": 9306, "task_loss": 1.0774624347686768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9463523626327515, "epoch": 7.87, "learning_rate": 1.1848408002254156e-05, "loss": 0.7726, "step": 9307, "task_loss": 1.0818854570388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5058406591415405, "epoch": 7.87, "learning_rate": 1.1843711843711844e-05, "loss": 0.5991, "step": 9308, "task_loss": 0.598130464553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8943616151809692, "epoch": 7.87, "learning_rate": 1.183901568516953e-05, "loss": 0.8202, "step": 9309, "task_loss": 0.7584429979324341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.380227267742157, "epoch": 7.87, "learning_rate": 1.1834319526627219e-05, "loss": 0.5901, "step": 9310, "task_loss": 0.9526321291923523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6819581985473633, "epoch": 7.87, "learning_rate": 1.1829623368084909e-05, "loss": 0.7759, "step": 9311, "task_loss": 1.0904330015182495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6405022144317627, "epoch": 7.87, "learning_rate": 1.1824927209542595e-05, "loss": 0.7902, "step": 9312, "task_loss": 0.9204011559486389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.521255612373352, "epoch": 7.87, "learning_rate": 1.1820231051000283e-05, "loss": 0.6348, "step": 9313, "task_loss": 0.7240114808082581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.490625262260437, "epoch": 7.87, "learning_rate": 1.1815534892457971e-05, "loss": 0.7213, "step": 9314, "task_loss": 0.6157011389732361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38426339626312256, "epoch": 7.87, "learning_rate": 1.1810838733915657e-05, "loss": 0.6538, "step": 9315, "task_loss": 0.8507285118103027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3548632264137268, "epoch": 7.87, "learning_rate": 1.1806142575373345e-05, "loss": 0.5257, "step": 9316, "task_loss": 0.371065229177475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0167105197906494, "epoch": 7.88, "learning_rate": 1.1801446416831033e-05, "loss": 0.7179, "step": 9317, "task_loss": 1.4678363800048828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.687595784664154, "epoch": 7.88, "learning_rate": 1.179675025828872e-05, "loss": 0.7365, "step": 9318, "task_loss": 0.7651093006134033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6699698567390442, "epoch": 7.88, "learning_rate": 1.1792054099746408e-05, "loss": 0.7491, "step": 9319, "task_loss": 0.6311319470405579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34880444407463074, "epoch": 7.88, "learning_rate": 1.1787357941204096e-05, "loss": 0.6028, "step": 9320, "task_loss": 0.5314533710479736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5252175331115723, "epoch": 7.88, "learning_rate": 1.1782661782661784e-05, "loss": 0.5801, "step": 9321, "task_loss": 0.8142345547676086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9274237155914307, "epoch": 7.88, "learning_rate": 1.177796562411947e-05, "loss": 0.6422, "step": 9322, "task_loss": 0.4044482111930847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3893706202507019, "epoch": 7.88, "learning_rate": 1.1773269465577158e-05, "loss": 0.5816, "step": 9323, "task_loss": 0.5028839111328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6463404297828674, "epoch": 7.88, "learning_rate": 1.1768573307034846e-05, "loss": 0.5668, "step": 9324, "task_loss": 1.439249873161316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2882609963417053, "epoch": 7.88, "learning_rate": 1.1763877148492533e-05, "loss": 0.5096, "step": 9325, "task_loss": 0.9778980016708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9949068427085876, "epoch": 7.88, "learning_rate": 1.175918098995022e-05, "loss": 0.7547, "step": 9326, "task_loss": 1.558409571647644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5672178864479065, "epoch": 7.88, "learning_rate": 1.175448483140791e-05, "loss": 0.683, "step": 9327, "task_loss": 0.4719322621822357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7025935053825378, "epoch": 7.88, "learning_rate": 1.1749788672865597e-05, "loss": 0.6788, "step": 9328, "task_loss": 1.3948453664779663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4702918529510498, "epoch": 7.89, "learning_rate": 1.1745092514323285e-05, "loss": 0.5737, "step": 9329, "task_loss": 1.1162681579589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3098708987236023, "epoch": 7.89, "learning_rate": 1.1740396355780973e-05, "loss": 0.5893, "step": 9330, "task_loss": 0.4862585961818695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6313767433166504, "epoch": 7.89, "learning_rate": 1.1735700197238659e-05, "loss": 0.5906, "step": 9331, "task_loss": 0.3238650858402252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5914211869239807, "epoch": 7.89, "learning_rate": 1.1731004038696347e-05, "loss": 0.794, "step": 9332, "task_loss": 0.7812368869781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6282732486724854, "epoch": 7.89, "learning_rate": 1.1726307880154035e-05, "loss": 0.4491, "step": 9333, "task_loss": 1.3304002285003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6539148092269897, "epoch": 7.89, "learning_rate": 1.1721611721611721e-05, "loss": 0.6382, "step": 9334, "task_loss": 0.5356806516647339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3320097029209137, "epoch": 7.89, "learning_rate": 1.171691556306941e-05, "loss": 0.59, "step": 9335, "task_loss": 0.6513996124267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9658387899398804, "epoch": 7.89, "learning_rate": 1.1712219404527098e-05, "loss": 0.7695, "step": 9336, "task_loss": 1.1512246131896973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22772479057312012, "epoch": 7.89, "learning_rate": 1.1707523245984786e-05, "loss": 0.5025, "step": 9337, "task_loss": 0.341417521238327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6229044198989868, "epoch": 7.89, "learning_rate": 1.1702827087442472e-05, "loss": 0.7288, "step": 9338, "task_loss": 1.080311894416809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9156361818313599, "epoch": 7.89, "learning_rate": 1.169813092890016e-05, "loss": 0.7711, "step": 9339, "task_loss": 1.792427659034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4877018928527832, "epoch": 7.89, "learning_rate": 1.1693434770357848e-05, "loss": 0.4946, "step": 9340, "task_loss": 0.690035343170166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3781812787055969, "epoch": 7.9, "learning_rate": 1.1688738611815534e-05, "loss": 0.4752, "step": 9341, "task_loss": 0.04009866714477539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3604905605316162, "epoch": 7.9, "learning_rate": 1.1684042453273224e-05, "loss": 0.5822, "step": 9342, "task_loss": 0.21230128407478333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48242148756980896, "epoch": 7.9, "learning_rate": 1.1679346294730912e-05, "loss": 0.6545, "step": 9343, "task_loss": 0.9765806198120117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6387937664985657, "epoch": 7.9, "learning_rate": 1.1674650136188598e-05, "loss": 0.7311, "step": 9344, "task_loss": 0.7566108107566833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6157197952270508, "epoch": 7.9, "learning_rate": 1.1669953977646286e-05, "loss": 0.6148, "step": 9345, "task_loss": 0.24975921213626862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6847743988037109, "epoch": 7.9, "learning_rate": 1.1665257819103974e-05, "loss": 0.6077, "step": 9346, "task_loss": 1.2689799070358276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40344005823135376, "epoch": 7.9, "learning_rate": 1.166056166056166e-05, "loss": 0.4981, "step": 9347, "task_loss": 0.3088672459125519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7288113832473755, "epoch": 7.9, "learning_rate": 1.1655865502019349e-05, "loss": 0.6156, "step": 9348, "task_loss": 0.571753978729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7345532178878784, "epoch": 7.9, "learning_rate": 1.1651169343477037e-05, "loss": 0.6343, "step": 9349, "task_loss": 0.8760285973548889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45350104570388794, "epoch": 7.9, "learning_rate": 1.1646473184934723e-05, "loss": 0.6294, "step": 9350, "task_loss": 0.6747958660125732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3357434868812561, "epoch": 7.9, "learning_rate": 1.1641777026392411e-05, "loss": 0.5282, "step": 9351, "task_loss": 0.089142806828022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6920523643493652, "epoch": 7.9, "learning_rate": 1.16370808678501e-05, "loss": 0.5854, "step": 9352, "task_loss": 0.7401637434959412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8352468609809875, "epoch": 7.91, "learning_rate": 1.1632384709307786e-05, "loss": 0.5755, "step": 9353, "task_loss": 0.8834419846534729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5791182518005371, "epoch": 7.91, "learning_rate": 1.1627688550765474e-05, "loss": 0.6237, "step": 9354, "task_loss": 0.8880972862243652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3421515226364136, "epoch": 7.91, "learning_rate": 1.1622992392223162e-05, "loss": 0.5417, "step": 9355, "task_loss": 0.424744188785553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5214434862136841, "epoch": 7.91, "learning_rate": 1.161829623368085e-05, "loss": 0.7062, "step": 9356, "task_loss": 0.5954174995422363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5210024118423462, "epoch": 7.91, "learning_rate": 1.1613600075138538e-05, "loss": 0.6194, "step": 9357, "task_loss": 0.7415909171104431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6676424741744995, "epoch": 7.91, "learning_rate": 1.1608903916596226e-05, "loss": 0.6732, "step": 9358, "task_loss": 0.5274196863174438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8020111322402954, "epoch": 7.91, "learning_rate": 1.1604207758053912e-05, "loss": 0.6354, "step": 9359, "task_loss": 0.14721661806106567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3807380795478821, "epoch": 7.91, "learning_rate": 1.15995115995116e-05, "loss": 0.4464, "step": 9360, "task_loss": 0.637386679649353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.922400176525116, "epoch": 7.91, "learning_rate": 1.1594815440969288e-05, "loss": 0.6942, "step": 9361, "task_loss": 1.6940990686416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39947760105133057, "epoch": 7.91, "learning_rate": 1.1590119282426976e-05, "loss": 0.593, "step": 9362, "task_loss": 0.5549744367599487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3199881315231323, "epoch": 7.91, "learning_rate": 1.1585423123884663e-05, "loss": 0.4834, "step": 9363, "task_loss": 0.2715682089328766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6301392316818237, "epoch": 7.91, "learning_rate": 1.158072696534235e-05, "loss": 0.6499, "step": 9364, "task_loss": 0.257243812084198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7888371348381042, "epoch": 7.92, "learning_rate": 1.1576030806800039e-05, "loss": 0.5809, "step": 9365, "task_loss": 0.987948477268219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8416552543640137, "epoch": 7.92, "learning_rate": 1.1571334648257725e-05, "loss": 0.4506, "step": 9366, "task_loss": 0.6163206100463867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6055905818939209, "epoch": 7.92, "learning_rate": 1.1566638489715413e-05, "loss": 0.5845, "step": 9367, "task_loss": 0.5403327941894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4669856131076813, "epoch": 7.92, "learning_rate": 1.1561942331173101e-05, "loss": 0.6346, "step": 9368, "task_loss": 0.7165498733520508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6917380094528198, "epoch": 7.92, "learning_rate": 1.1557246172630787e-05, "loss": 0.6259, "step": 9369, "task_loss": 1.0885255336761475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5442982316017151, "epoch": 7.92, "learning_rate": 1.1552550014088475e-05, "loss": 0.602, "step": 9370, "task_loss": 0.3081853687763214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4410777688026428, "epoch": 7.92, "learning_rate": 1.1547853855546163e-05, "loss": 0.5903, "step": 9371, "task_loss": 0.9331405758857727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4594109058380127, "epoch": 7.92, "learning_rate": 1.1543157697003852e-05, "loss": 0.6657, "step": 9372, "task_loss": 0.48760098218917847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5494014620780945, "epoch": 7.92, "learning_rate": 1.153846153846154e-05, "loss": 0.7147, "step": 9373, "task_loss": 1.1481456756591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.791900634765625, "epoch": 7.92, "learning_rate": 1.1533765379919228e-05, "loss": 0.5603, "step": 9374, "task_loss": 1.0063822269439697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5902163982391357, "epoch": 7.92, "learning_rate": 1.1529069221376914e-05, "loss": 0.8235, "step": 9375, "task_loss": 1.0874119997024536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6441088914871216, "epoch": 7.93, "learning_rate": 1.1524373062834602e-05, "loss": 0.7089, "step": 9376, "task_loss": 1.329241394996643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41410893201828003, "epoch": 7.93, "learning_rate": 1.151967690429229e-05, "loss": 0.7748, "step": 9377, "task_loss": 0.3507739305496216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8321547508239746, "epoch": 7.93, "learning_rate": 1.1514980745749978e-05, "loss": 0.6307, "step": 9378, "task_loss": 1.0516330003738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5715767741203308, "epoch": 7.93, "learning_rate": 1.1510284587207664e-05, "loss": 0.6346, "step": 9379, "task_loss": 1.2552374601364136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5960497856140137, "epoch": 7.93, "learning_rate": 1.1505588428665352e-05, "loss": 0.562, "step": 9380, "task_loss": 1.1063677072525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5169216394424438, "epoch": 7.93, "learning_rate": 1.150089227012304e-05, "loss": 0.5013, "step": 9381, "task_loss": 0.05260728299617767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42139527201652527, "epoch": 7.93, "learning_rate": 1.1496196111580727e-05, "loss": 0.4781, "step": 9382, "task_loss": 0.3015449047088623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8397413492202759, "epoch": 7.93, "learning_rate": 1.1491499953038415e-05, "loss": 0.7093, "step": 9383, "task_loss": 0.820940375328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.632329523563385, "epoch": 7.93, "learning_rate": 1.1486803794496103e-05, "loss": 0.602, "step": 9384, "task_loss": 0.8939549326896667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.403773695230484, "epoch": 7.93, "learning_rate": 1.148210763595379e-05, "loss": 0.5262, "step": 9385, "task_loss": 0.7638610005378723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41257116198539734, "epoch": 7.93, "learning_rate": 1.1477411477411477e-05, "loss": 0.5895, "step": 9386, "task_loss": 1.3344825506210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3829728960990906, "epoch": 7.93, "learning_rate": 1.1472715318869165e-05, "loss": 0.5192, "step": 9387, "task_loss": 0.5396255850791931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46532174944877625, "epoch": 7.94, "learning_rate": 1.1468019160326853e-05, "loss": 0.606, "step": 9388, "task_loss": 1.5290993452072144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5130600333213806, "epoch": 7.94, "learning_rate": 1.1463323001784541e-05, "loss": 0.4386, "step": 9389, "task_loss": 0.2596134841442108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9112262725830078, "epoch": 7.94, "learning_rate": 1.145862684324223e-05, "loss": 0.5762, "step": 9390, "task_loss": 0.5521166324615479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5623219609260559, "epoch": 7.94, "learning_rate": 1.1453930684699916e-05, "loss": 0.5641, "step": 9391, "task_loss": 1.2574126720428467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3407719135284424, "epoch": 7.94, "learning_rate": 1.1449234526157604e-05, "loss": 0.6748, "step": 9392, "task_loss": 0.33723169565200806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5623328685760498, "epoch": 7.94, "learning_rate": 1.1444538367615292e-05, "loss": 0.5412, "step": 9393, "task_loss": 0.6533262729644775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3463544249534607, "epoch": 7.94, "learning_rate": 1.143984220907298e-05, "loss": 0.5566, "step": 9394, "task_loss": 0.07804758101701736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1490049362182617, "epoch": 7.94, "learning_rate": 1.1435146050530666e-05, "loss": 0.7712, "step": 9395, "task_loss": 1.5012118816375732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5321285724639893, "epoch": 7.94, "learning_rate": 1.1430449891988354e-05, "loss": 0.534, "step": 9396, "task_loss": 1.1554899215698242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.615803062915802, "epoch": 7.94, "learning_rate": 1.1425753733446042e-05, "loss": 0.5118, "step": 9397, "task_loss": 0.6349579691886902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.65102219581604, "epoch": 7.94, "learning_rate": 1.1421057574903729e-05, "loss": 0.7144, "step": 9398, "task_loss": 2.310368776321411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6885597705841064, "epoch": 7.94, "learning_rate": 1.1416361416361417e-05, "loss": 0.6439, "step": 9399, "task_loss": 0.5299830436706543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6956585645675659, "epoch": 7.95, "learning_rate": 1.1411665257819105e-05, "loss": 0.6378, "step": 9400, "task_loss": 1.042489767074585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23922953009605408, "epoch": 7.95, "learning_rate": 1.1406969099276791e-05, "loss": 0.4312, "step": 9401, "task_loss": 0.23245881497859955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4080907106399536, "epoch": 7.95, "learning_rate": 1.1402272940734479e-05, "loss": 0.6091, "step": 9402, "task_loss": 1.4790080785751343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6414295434951782, "epoch": 7.95, "learning_rate": 1.1397576782192167e-05, "loss": 0.7335, "step": 9403, "task_loss": 0.5174880623817444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065413951873779, "epoch": 7.95, "learning_rate": 1.1392880623649855e-05, "loss": 0.5987, "step": 9404, "task_loss": 0.7833905816078186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3254406154155731, "epoch": 7.95, "learning_rate": 1.1388184465107543e-05, "loss": 0.6456, "step": 9405, "task_loss": 0.5428582429885864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7399395704269409, "epoch": 7.95, "learning_rate": 1.1383488306565231e-05, "loss": 0.7147, "step": 9406, "task_loss": 0.567611813545227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.368856281042099, "epoch": 7.95, "learning_rate": 1.1378792148022917e-05, "loss": 0.6076, "step": 9407, "task_loss": 0.36072567105293274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4096352159976959, "epoch": 7.95, "learning_rate": 1.1374095989480605e-05, "loss": 0.5723, "step": 9408, "task_loss": 0.3181588649749756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5841010808944702, "epoch": 7.95, "learning_rate": 1.1369399830938294e-05, "loss": 0.5614, "step": 9409, "task_loss": 0.4596448838710785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3662697970867157, "epoch": 7.95, "learning_rate": 1.1364703672395982e-05, "loss": 0.5684, "step": 9410, "task_loss": 0.47355592250823975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5822714567184448, "epoch": 7.95, "learning_rate": 1.1360007513853668e-05, "loss": 0.48, "step": 9411, "task_loss": 0.30053797364234924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5498231053352356, "epoch": 7.96, "learning_rate": 1.1355311355311356e-05, "loss": 0.6215, "step": 9412, "task_loss": 0.8459458351135254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2722030282020569, "epoch": 7.96, "learning_rate": 1.1350615196769044e-05, "loss": 0.4228, "step": 9413, "task_loss": 0.3971829414367676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6278061270713806, "epoch": 7.96, "learning_rate": 1.134591903822673e-05, "loss": 0.7216, "step": 9414, "task_loss": 0.3004898428916931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4071553647518158, "epoch": 7.96, "learning_rate": 1.1341222879684418e-05, "loss": 0.6367, "step": 9415, "task_loss": 1.531945824623108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48915520310401917, "epoch": 7.96, "learning_rate": 1.1336526721142106e-05, "loss": 0.5535, "step": 9416, "task_loss": 0.45885297656059265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6418141722679138, "epoch": 7.96, "learning_rate": 1.1331830562599793e-05, "loss": 0.5335, "step": 9417, "task_loss": 0.2028040885925293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.558367133140564, "epoch": 7.96, "learning_rate": 1.132713440405748e-05, "loss": 0.6851, "step": 9418, "task_loss": 0.893747091293335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0176935195922852, "epoch": 7.96, "learning_rate": 1.132243824551517e-05, "loss": 0.7962, "step": 9419, "task_loss": 1.1557059288024902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5256329774856567, "epoch": 7.96, "learning_rate": 1.1317742086972857e-05, "loss": 0.5362, "step": 9420, "task_loss": 0.5842135548591614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5086922645568848, "epoch": 7.96, "learning_rate": 1.1313045928430545e-05, "loss": 0.5344, "step": 9421, "task_loss": 0.28273290395736694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.572037935256958, "epoch": 7.96, "learning_rate": 1.1308349769888233e-05, "loss": 0.8839, "step": 9422, "task_loss": 1.2476005554199219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7086217403411865, "epoch": 7.96, "learning_rate": 1.130365361134592e-05, "loss": 0.5545, "step": 9423, "task_loss": 1.0698505640029907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6112680435180664, "epoch": 7.97, "learning_rate": 1.1298957452803607e-05, "loss": 0.6292, "step": 9424, "task_loss": 0.6469340920448303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0409263372421265, "epoch": 7.97, "learning_rate": 1.1294261294261295e-05, "loss": 0.6465, "step": 9425, "task_loss": 1.114302396774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4348999261856079, "epoch": 7.97, "learning_rate": 1.1289565135718982e-05, "loss": 0.5127, "step": 9426, "task_loss": 0.338985413312912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7122441530227661, "epoch": 7.97, "learning_rate": 1.128486897717667e-05, "loss": 0.762, "step": 9427, "task_loss": 0.9999758005142212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8094573020935059, "epoch": 7.97, "learning_rate": 1.1280172818634358e-05, "loss": 0.7401, "step": 9428, "task_loss": 0.7292618155479431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4456721544265747, "epoch": 7.97, "learning_rate": 1.1275476660092046e-05, "loss": 0.5867, "step": 9429, "task_loss": 0.35794416069984436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7528291940689087, "epoch": 7.97, "learning_rate": 1.1270780501549732e-05, "loss": 0.5665, "step": 9430, "task_loss": 1.2289931774139404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44323283433914185, "epoch": 7.97, "learning_rate": 1.126608434300742e-05, "loss": 0.6618, "step": 9431, "task_loss": 0.07240094989538193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7988613247871399, "epoch": 7.97, "learning_rate": 1.1261388184465108e-05, "loss": 0.5656, "step": 9432, "task_loss": 0.245982825756073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.042212963104248, "epoch": 7.97, "learning_rate": 1.1256692025922794e-05, "loss": 0.8128, "step": 9433, "task_loss": 1.2321810722351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5708528757095337, "epoch": 7.97, "learning_rate": 1.1251995867380482e-05, "loss": 0.5911, "step": 9434, "task_loss": 0.7063968181610107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.405477911233902, "epoch": 7.97, "learning_rate": 1.1247299708838172e-05, "loss": 0.7664, "step": 9435, "task_loss": 0.0916319265961647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6326366066932678, "epoch": 7.98, "learning_rate": 1.1242603550295859e-05, "loss": 0.8127, "step": 9436, "task_loss": 0.8660032153129578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9162658452987671, "epoch": 7.98, "learning_rate": 1.1237907391753547e-05, "loss": 0.7986, "step": 9437, "task_loss": 0.9233136773109436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4461521804332733, "epoch": 7.98, "learning_rate": 1.1233211233211235e-05, "loss": 0.4778, "step": 9438, "task_loss": 0.6265773773193359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4681202173233032, "epoch": 7.98, "learning_rate": 1.1228515074668921e-05, "loss": 0.5978, "step": 9439, "task_loss": 0.5657489895820618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6269662380218506, "epoch": 7.98, "learning_rate": 1.1223818916126609e-05, "loss": 0.5584, "step": 9440, "task_loss": 1.0935978889465332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40904706716537476, "epoch": 7.98, "learning_rate": 1.1219122757584297e-05, "loss": 0.4613, "step": 9441, "task_loss": 0.4207878112792969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39742425084114075, "epoch": 7.98, "learning_rate": 1.1214426599041983e-05, "loss": 0.6752, "step": 9442, "task_loss": 1.166953444480896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6442578434944153, "epoch": 7.98, "learning_rate": 1.1209730440499671e-05, "loss": 0.699, "step": 9443, "task_loss": 1.2483375072479248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3225404620170593, "epoch": 7.98, "learning_rate": 1.120503428195736e-05, "loss": 0.4682, "step": 9444, "task_loss": 0.07836078107357025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3078013062477112, "epoch": 7.98, "learning_rate": 1.1200338123415047e-05, "loss": 0.57, "step": 9445, "task_loss": 0.7894362807273865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7230902910232544, "epoch": 7.98, "learning_rate": 1.1195641964872734e-05, "loss": 0.533, "step": 9446, "task_loss": 1.3660478591918945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.709510087966919, "epoch": 7.99, "learning_rate": 1.1190945806330422e-05, "loss": 0.5465, "step": 9447, "task_loss": 0.7404565811157227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5552453994750977, "epoch": 7.99, "learning_rate": 1.118624964778811e-05, "loss": 0.7681, "step": 9448, "task_loss": 0.6044657230377197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7909207940101624, "epoch": 7.99, "learning_rate": 1.1181553489245796e-05, "loss": 0.7089, "step": 9449, "task_loss": 1.0200181007385254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5152877569198608, "epoch": 7.99, "learning_rate": 1.1176857330703486e-05, "loss": 0.5334, "step": 9450, "task_loss": 0.10435809940099716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5352990627288818, "epoch": 7.99, "learning_rate": 1.1172161172161174e-05, "loss": 0.5934, "step": 9451, "task_loss": 1.231873869895935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45269647240638733, "epoch": 7.99, "learning_rate": 1.116746501361886e-05, "loss": 0.5244, "step": 9452, "task_loss": 0.865516722202301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45614904165267944, "epoch": 7.99, "learning_rate": 1.1162768855076548e-05, "loss": 0.509, "step": 9453, "task_loss": 0.7096452713012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6409111022949219, "epoch": 7.99, "learning_rate": 1.1158072696534236e-05, "loss": 0.5441, "step": 9454, "task_loss": 0.9649538993835449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48468875885009766, "epoch": 7.99, "learning_rate": 1.1153376537991923e-05, "loss": 0.5363, "step": 9455, "task_loss": 1.204743504524231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.526914119720459, "epoch": 7.99, "learning_rate": 1.114868037944961e-05, "loss": 0.6603, "step": 9456, "task_loss": 0.28179535269737244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5733225345611572, "epoch": 7.99, "learning_rate": 1.1143984220907299e-05, "loss": 0.525, "step": 9457, "task_loss": 0.6432082056999207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5511621236801147, "epoch": 7.99, "learning_rate": 1.1139288062364985e-05, "loss": 0.6907, "step": 9458, "task_loss": 0.36878806352615356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7930650115013123, "epoch": 8.0, "learning_rate": 1.1134591903822673e-05, "loss": 0.6613, "step": 9459, "task_loss": 0.45087382197380066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8984353542327881, "epoch": 8.0, "learning_rate": 1.1129895745280361e-05, "loss": 0.7457, "step": 9460, "task_loss": 1.0850975513458252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9980065226554871, "epoch": 8.0, "learning_rate": 1.112519958673805e-05, "loss": 0.5874, "step": 9461, "task_loss": 1.310005784034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7816933393478394, "epoch": 8.0, "learning_rate": 1.1120503428195736e-05, "loss": 0.6643, "step": 9462, "task_loss": 1.144230842590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9402523636817932, "epoch": 8.0, "learning_rate": 1.1115807269653424e-05, "loss": 0.7737, "step": 9463, "task_loss": 1.4603557586669922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8404320478439331, "epoch": 8.0, "learning_rate": 1.1111111111111112e-05, "loss": 0.5656, "step": 9464, "task_loss": 0.6050480604171753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5812202095985413, "epoch": 8.0, "learning_rate": 1.11064149525688e-05, "loss": 1.291, "step": 9465, "task_loss": 0.38076305389404297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41187962889671326, "epoch": 8.0, "learning_rate": 1.1101718794026488e-05, "loss": 0.4509, "step": 9466, "task_loss": 0.3408677279949188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4635920524597168, "epoch": 8.0, "learning_rate": 1.1097022635484176e-05, "loss": 0.7054, "step": 9467, "task_loss": 1.1829211711883545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1287435293197632, "epoch": 8.0, "learning_rate": 1.1092326476941862e-05, "loss": 0.7307, "step": 9468, "task_loss": 1.8518165349960327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27776604890823364, "epoch": 8.0, "learning_rate": 1.108763031839955e-05, "loss": 0.5168, "step": 9469, "task_loss": 0.12676112353801727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.407720685005188, "epoch": 8.01, "learning_rate": 1.1082934159857238e-05, "loss": 0.5175, "step": 9470, "task_loss": 0.7011333703994751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4935466945171356, "epoch": 8.01, "learning_rate": 1.1078238001314924e-05, "loss": 0.5673, "step": 9471, "task_loss": 0.504375159740448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7150142192840576, "epoch": 8.01, "learning_rate": 1.1073541842772613e-05, "loss": 0.8321, "step": 9472, "task_loss": 0.6426998972892761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5436955690383911, "epoch": 8.01, "learning_rate": 1.10688456842303e-05, "loss": 0.5863, "step": 9473, "task_loss": 1.0703023672103882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5467393398284912, "epoch": 8.01, "learning_rate": 1.1064149525687987e-05, "loss": 0.8791, "step": 9474, "task_loss": 0.7123474478721619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.562700092792511, "epoch": 8.01, "learning_rate": 1.1059453367145675e-05, "loss": 0.7148, "step": 9475, "task_loss": 1.1184792518615723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5672982335090637, "epoch": 8.01, "learning_rate": 1.1054757208603363e-05, "loss": 0.6271, "step": 9476, "task_loss": 1.3319036960601807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49629098176956177, "epoch": 8.01, "learning_rate": 1.1050061050061051e-05, "loss": 0.6934, "step": 9477, "task_loss": 0.33800455927848816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9714409112930298, "epoch": 8.01, "learning_rate": 1.1045364891518737e-05, "loss": 0.6578, "step": 9478, "task_loss": 0.5665557384490967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7250206470489502, "epoch": 8.01, "learning_rate": 1.1040668732976425e-05, "loss": 0.6071, "step": 9479, "task_loss": 0.6859468221664429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7587070465087891, "epoch": 8.01, "learning_rate": 1.1035972574434113e-05, "loss": 0.7226, "step": 9480, "task_loss": 0.8831332325935364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7198452353477478, "epoch": 8.01, "learning_rate": 1.1031276415891801e-05, "loss": 0.556, "step": 9481, "task_loss": 0.7043091058731079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4940698444843292, "epoch": 8.02, "learning_rate": 1.102658025734949e-05, "loss": 0.7, "step": 9482, "task_loss": 0.7922248840332031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7235041260719299, "epoch": 8.02, "learning_rate": 1.1021884098807178e-05, "loss": 0.6851, "step": 9483, "task_loss": 0.2755070924758911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3108081817626953, "epoch": 8.02, "learning_rate": 1.1017187940264864e-05, "loss": 0.3959, "step": 9484, "task_loss": 0.32028353214263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.704292893409729, "epoch": 8.02, "learning_rate": 1.1012491781722552e-05, "loss": 0.5865, "step": 9485, "task_loss": 0.3405129015445709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7406017184257507, "epoch": 8.02, "learning_rate": 1.100779562318024e-05, "loss": 0.606, "step": 9486, "task_loss": 1.3421653509140015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47849640250205994, "epoch": 8.02, "learning_rate": 1.1003099464637926e-05, "loss": 0.6357, "step": 9487, "task_loss": 0.3793576955795288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5242279767990112, "epoch": 8.02, "learning_rate": 1.0998403306095614e-05, "loss": 0.5402, "step": 9488, "task_loss": 0.5913048386573792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8360837697982788, "epoch": 8.02, "learning_rate": 1.0993707147553302e-05, "loss": 0.5956, "step": 9489, "task_loss": 1.5875080823898315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6151258945465088, "epoch": 8.02, "learning_rate": 1.0989010989010989e-05, "loss": 0.666, "step": 9490, "task_loss": 0.9413996338844299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.795143723487854, "epoch": 8.02, "learning_rate": 1.0984314830468677e-05, "loss": 0.6743, "step": 9491, "task_loss": 2.242873430252075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0498865842819214, "epoch": 8.02, "learning_rate": 1.0979618671926365e-05, "loss": 0.676, "step": 9492, "task_loss": 1.261332631111145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5436716675758362, "epoch": 8.02, "learning_rate": 1.0974922513384053e-05, "loss": 0.7626, "step": 9493, "task_loss": 0.6913396120071411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5174266695976257, "epoch": 8.03, "learning_rate": 1.0970226354841739e-05, "loss": 0.5278, "step": 9494, "task_loss": 0.11990638077259064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6571503281593323, "epoch": 8.03, "learning_rate": 1.0965530196299427e-05, "loss": 0.6977, "step": 9495, "task_loss": 0.8220389485359192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6781460642814636, "epoch": 8.03, "learning_rate": 1.0960834037757115e-05, "loss": 0.6446, "step": 9496, "task_loss": 0.45212236046791077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46204933524131775, "epoch": 8.03, "learning_rate": 1.0956137879214803e-05, "loss": 0.5897, "step": 9497, "task_loss": 0.5921857953071594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6566625237464905, "epoch": 8.03, "learning_rate": 1.0951441720672491e-05, "loss": 0.5933, "step": 9498, "task_loss": 0.6926958560943604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6501269340515137, "epoch": 8.03, "learning_rate": 1.094674556213018e-05, "loss": 0.4954, "step": 9499, "task_loss": 0.42471814155578613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5690914392471313, "epoch": 8.03, "learning_rate": 1.0942049403587866e-05, "loss": 0.6174, "step": 9500, "task_loss": 0.7666858434677124 }, { "epoch": 8.03, "eval_accuracy": 0.9005148514851485, "eval_loss": 0.39518916606903076, "eval_runtime": 225.8436, "eval_samples_per_second": 111.803, "eval_steps_per_second": 0.877, "step": 9500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7263021469116211, "epoch": 8.03, "learning_rate": 1.0937353245045554e-05, "loss": 0.5625, "step": 9501, "task_loss": 0.7129467725753784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5475554466247559, "epoch": 8.03, "learning_rate": 1.0932657086503242e-05, "loss": 0.7051, "step": 9502, "task_loss": 1.0951508283615112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4801509380340576, "epoch": 8.03, "learning_rate": 1.0927960927960928e-05, "loss": 0.5024, "step": 9503, "task_loss": 0.6476265788078308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.993637204170227, "epoch": 8.03, "learning_rate": 1.0923264769418616e-05, "loss": 0.6417, "step": 9504, "task_loss": 1.204645037651062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6532274484634399, "epoch": 8.03, "learning_rate": 1.0918568610876304e-05, "loss": 0.6253, "step": 9505, "task_loss": 0.7137248516082764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5510648488998413, "epoch": 8.04, "learning_rate": 1.091387245233399e-05, "loss": 0.6376, "step": 9506, "task_loss": 0.5058783292770386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5566229820251465, "epoch": 8.04, "learning_rate": 1.0909176293791678e-05, "loss": 0.6422, "step": 9507, "task_loss": 0.7207304835319519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.129194974899292, "epoch": 8.04, "learning_rate": 1.0904480135249366e-05, "loss": 0.8346, "step": 9508, "task_loss": 1.364959716796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46848469972610474, "epoch": 8.04, "learning_rate": 1.0899783976707053e-05, "loss": 0.5779, "step": 9509, "task_loss": 0.2549867630004883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8912129402160645, "epoch": 8.04, "learning_rate": 1.0895087818164741e-05, "loss": 0.712, "step": 9510, "task_loss": 1.5058979988098145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3895356059074402, "epoch": 8.04, "learning_rate": 1.0890391659622429e-05, "loss": 0.5678, "step": 9511, "task_loss": 0.8778238296508789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3521405756473541, "epoch": 8.04, "learning_rate": 1.0885695501080117e-05, "loss": 0.445, "step": 9512, "task_loss": 0.5191879272460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7175852060317993, "epoch": 8.04, "learning_rate": 1.0880999342537805e-05, "loss": 0.5306, "step": 9513, "task_loss": 1.2599470615386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40497875213623047, "epoch": 8.04, "learning_rate": 1.0876303183995493e-05, "loss": 0.5988, "step": 9514, "task_loss": 0.7481816411018372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7450413703918457, "epoch": 8.04, "learning_rate": 1.087160702545318e-05, "loss": 0.7329, "step": 9515, "task_loss": 1.0181057453155518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41929566860198975, "epoch": 8.04, "learning_rate": 1.0866910866910867e-05, "loss": 0.4814, "step": 9516, "task_loss": 0.6816752552986145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3803497552871704, "epoch": 8.04, "learning_rate": 1.0862214708368555e-05, "loss": 0.5811, "step": 9517, "task_loss": 0.42157575488090515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7278377413749695, "epoch": 8.05, "learning_rate": 1.0857518549826243e-05, "loss": 0.6044, "step": 9518, "task_loss": 1.1790415048599243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44540244340896606, "epoch": 8.05, "learning_rate": 1.085282239128393e-05, "loss": 0.5197, "step": 9519, "task_loss": 0.8392729759216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7218919992446899, "epoch": 8.05, "learning_rate": 1.0848126232741618e-05, "loss": 0.558, "step": 9520, "task_loss": 0.27950525283813477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5292428731918335, "epoch": 8.05, "learning_rate": 1.0843430074199306e-05, "loss": 0.6059, "step": 9521, "task_loss": 0.2518424987792969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6718384623527527, "epoch": 8.05, "learning_rate": 1.0838733915656992e-05, "loss": 0.8361, "step": 9522, "task_loss": 0.615452229976654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4190441370010376, "epoch": 8.05, "learning_rate": 1.083403775711468e-05, "loss": 0.6571, "step": 9523, "task_loss": 0.35550805926322937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4793803095817566, "epoch": 8.05, "learning_rate": 1.0829341598572368e-05, "loss": 0.7197, "step": 9524, "task_loss": 0.4837338924407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4783594012260437, "epoch": 8.05, "learning_rate": 1.0824645440030055e-05, "loss": 0.5728, "step": 9525, "task_loss": 0.5893384218215942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7351547479629517, "epoch": 8.05, "learning_rate": 1.0819949281487743e-05, "loss": 0.6201, "step": 9526, "task_loss": 0.3935568034648895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7646112442016602, "epoch": 8.05, "learning_rate": 1.0815253122945432e-05, "loss": 0.5183, "step": 9527, "task_loss": 1.1995060443878174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5851702690124512, "epoch": 8.05, "learning_rate": 1.0810556964403119e-05, "loss": 0.4713, "step": 9528, "task_loss": 0.7927724719047546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7064776420593262, "epoch": 8.05, "learning_rate": 1.0805860805860807e-05, "loss": 0.6468, "step": 9529, "task_loss": 1.4219470024108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7205072641372681, "epoch": 8.06, "learning_rate": 1.0801164647318495e-05, "loss": 0.6581, "step": 9530, "task_loss": 0.4179346561431885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9075226783752441, "epoch": 8.06, "learning_rate": 1.0796468488776181e-05, "loss": 0.7322, "step": 9531, "task_loss": 0.47756433486938477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1874046325683594, "epoch": 8.06, "learning_rate": 1.0791772330233869e-05, "loss": 0.5494, "step": 9532, "task_loss": 0.40479543805122375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.669325053691864, "epoch": 8.06, "learning_rate": 1.0787076171691557e-05, "loss": 0.6081, "step": 9533, "task_loss": 0.966395378112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4969741702079773, "epoch": 8.06, "learning_rate": 1.0782380013149245e-05, "loss": 0.6979, "step": 9534, "task_loss": 1.2446085214614868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4613681435585022, "epoch": 8.06, "learning_rate": 1.0777683854606932e-05, "loss": 0.4495, "step": 9535, "task_loss": 0.2641541659832001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32833755016326904, "epoch": 8.06, "learning_rate": 1.077298769606462e-05, "loss": 0.5262, "step": 9536, "task_loss": 0.18245911598205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4848249554634094, "epoch": 8.06, "learning_rate": 1.0768291537522308e-05, "loss": 0.3762, "step": 9537, "task_loss": 0.5912644863128662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7036850452423096, "epoch": 8.06, "learning_rate": 1.0763595378979994e-05, "loss": 0.4789, "step": 9538, "task_loss": 1.3571386337280273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.843467652797699, "epoch": 8.06, "learning_rate": 1.0758899220437682e-05, "loss": 0.5502, "step": 9539, "task_loss": 1.5222222805023193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42739373445510864, "epoch": 8.06, "learning_rate": 1.075420306189537e-05, "loss": 0.6654, "step": 9540, "task_loss": 0.135125070810318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49046146869659424, "epoch": 8.07, "learning_rate": 1.0749506903353056e-05, "loss": 0.6236, "step": 9541, "task_loss": 0.7644829750061035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3661442697048187, "epoch": 8.07, "learning_rate": 1.0744810744810746e-05, "loss": 0.6882, "step": 9542, "task_loss": 0.9548665881156921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47083649039268494, "epoch": 8.07, "learning_rate": 1.0740114586268434e-05, "loss": 0.6607, "step": 9543, "task_loss": 0.3314148783683777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5044400095939636, "epoch": 8.07, "learning_rate": 1.073541842772612e-05, "loss": 0.62, "step": 9544, "task_loss": 0.40416666865348816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5955731868743896, "epoch": 8.07, "learning_rate": 1.0730722269183808e-05, "loss": 0.5837, "step": 9545, "task_loss": 0.4850604236125946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9529322385787964, "epoch": 8.07, "learning_rate": 1.0726026110641497e-05, "loss": 0.6436, "step": 9546, "task_loss": 1.1506404876708984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6712486743927002, "epoch": 8.07, "learning_rate": 1.0721329952099183e-05, "loss": 0.792, "step": 9547, "task_loss": 0.5607008337974548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3535802960395813, "epoch": 8.07, "learning_rate": 1.0716633793556871e-05, "loss": 0.5941, "step": 9548, "task_loss": 0.30362826585769653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.633469820022583, "epoch": 8.07, "learning_rate": 1.0711937635014559e-05, "loss": 0.649, "step": 9549, "task_loss": 0.999631941318512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4113759398460388, "epoch": 8.07, "learning_rate": 1.0707241476472247e-05, "loss": 0.5333, "step": 9550, "task_loss": 0.04083705693483353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7012148499488831, "epoch": 8.07, "learning_rate": 1.0702545317929933e-05, "loss": 0.5505, "step": 9551, "task_loss": 0.48087257146835327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6547578573226929, "epoch": 8.07, "learning_rate": 1.0697849159387621e-05, "loss": 0.4577, "step": 9552, "task_loss": 0.4722820520401001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5986226201057434, "epoch": 8.08, "learning_rate": 1.069315300084531e-05, "loss": 0.5541, "step": 9553, "task_loss": 0.39321789145469666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.633654773235321, "epoch": 8.08, "learning_rate": 1.0688456842302996e-05, "loss": 0.6088, "step": 9554, "task_loss": 0.75005042552948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1990816593170166, "epoch": 8.08, "learning_rate": 1.0683760683760684e-05, "loss": 0.7673, "step": 9555, "task_loss": 1.1684298515319824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.699962854385376, "epoch": 8.08, "learning_rate": 1.0679064525218372e-05, "loss": 0.5458, "step": 9556, "task_loss": 0.5113999247550964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43545788526535034, "epoch": 8.08, "learning_rate": 1.0674368366676058e-05, "loss": 0.522, "step": 9557, "task_loss": 0.41227102279663086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37495675683021545, "epoch": 8.08, "learning_rate": 1.0669672208133748e-05, "loss": 0.4859, "step": 9558, "task_loss": 0.13670139014720917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5536849498748779, "epoch": 8.08, "learning_rate": 1.0664976049591436e-05, "loss": 0.6112, "step": 9559, "task_loss": 0.5380703210830688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7537777423858643, "epoch": 8.08, "learning_rate": 1.0660279891049122e-05, "loss": 0.6979, "step": 9560, "task_loss": 1.063680648803711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6131018400192261, "epoch": 8.08, "learning_rate": 1.065558373250681e-05, "loss": 0.5089, "step": 9561, "task_loss": 0.9307178854942322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5359773635864258, "epoch": 8.08, "learning_rate": 1.0650887573964498e-05, "loss": 0.7115, "step": 9562, "task_loss": 0.4723987579345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42610296607017517, "epoch": 8.08, "learning_rate": 1.0646191415422185e-05, "loss": 0.7135, "step": 9563, "task_loss": 1.3517779111862183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5900095701217651, "epoch": 8.08, "learning_rate": 1.0641495256879873e-05, "loss": 0.6825, "step": 9564, "task_loss": 0.8574258089065552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6208919286727905, "epoch": 8.09, "learning_rate": 1.063679909833756e-05, "loss": 0.662, "step": 9565, "task_loss": 0.9144099950790405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5470881462097168, "epoch": 8.09, "learning_rate": 1.0632102939795249e-05, "loss": 0.464, "step": 9566, "task_loss": 0.2717429995536804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7977765798568726, "epoch": 8.09, "learning_rate": 1.0627406781252935e-05, "loss": 0.697, "step": 9567, "task_loss": 1.1663471460342407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6453373432159424, "epoch": 8.09, "learning_rate": 1.0622710622710623e-05, "loss": 0.4949, "step": 9568, "task_loss": 1.1563379764556885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.917698323726654, "epoch": 8.09, "learning_rate": 1.0618014464168311e-05, "loss": 0.871, "step": 9569, "task_loss": 1.695438265800476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4665974974632263, "epoch": 8.09, "learning_rate": 1.0613318305625997e-05, "loss": 0.6905, "step": 9570, "task_loss": 1.4889096021652222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.812059223651886, "epoch": 8.09, "learning_rate": 1.0608622147083686e-05, "loss": 0.5566, "step": 9571, "task_loss": 1.3008863925933838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5726824998855591, "epoch": 8.09, "learning_rate": 1.0603925988541374e-05, "loss": 0.5527, "step": 9572, "task_loss": 0.5701005458831787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7214691638946533, "epoch": 8.09, "learning_rate": 1.0599229829999062e-05, "loss": 0.4372, "step": 9573, "task_loss": 1.3378514051437378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36166322231292725, "epoch": 8.09, "learning_rate": 1.059453367145675e-05, "loss": 0.6361, "step": 9574, "task_loss": 0.2553742825984955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2937757968902588, "epoch": 8.09, "learning_rate": 1.0589837512914438e-05, "loss": 0.779, "step": 9575, "task_loss": 1.1187267303466797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5925964713096619, "epoch": 8.09, "learning_rate": 1.0585141354372124e-05, "loss": 0.6434, "step": 9576, "task_loss": 0.45923542976379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44675207138061523, "epoch": 8.1, "learning_rate": 1.0580445195829812e-05, "loss": 0.4873, "step": 9577, "task_loss": 0.7855995297431946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5561177730560303, "epoch": 8.1, "learning_rate": 1.05757490372875e-05, "loss": 0.5384, "step": 9578, "task_loss": 0.683715283870697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8120338320732117, "epoch": 8.1, "learning_rate": 1.0571052878745186e-05, "loss": 0.8039, "step": 9579, "task_loss": 0.8938063979148865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7195050716400146, "epoch": 8.1, "learning_rate": 1.0566356720202874e-05, "loss": 0.515, "step": 9580, "task_loss": 0.35608747601509094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.733194887638092, "epoch": 8.1, "learning_rate": 1.0561660561660562e-05, "loss": 0.5969, "step": 9581, "task_loss": 1.2638418674468994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3394720256328583, "epoch": 8.1, "learning_rate": 1.0556964403118249e-05, "loss": 0.4765, "step": 9582, "task_loss": 0.8286453485488892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4973260760307312, "epoch": 8.1, "learning_rate": 1.0552268244575937e-05, "loss": 0.5202, "step": 9583, "task_loss": 0.8858888745307922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7265393733978271, "epoch": 8.1, "learning_rate": 1.0547572086033625e-05, "loss": 0.7132, "step": 9584, "task_loss": 0.34723055362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23681239783763885, "epoch": 8.1, "learning_rate": 1.0542875927491313e-05, "loss": 0.5906, "step": 9585, "task_loss": 0.4672214984893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5347276926040649, "epoch": 8.1, "learning_rate": 1.0538179768949e-05, "loss": 0.6742, "step": 9586, "task_loss": 1.4647562503814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7493748664855957, "epoch": 8.1, "learning_rate": 1.0533483610406687e-05, "loss": 0.542, "step": 9587, "task_loss": 0.8830563426017761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5776088237762451, "epoch": 8.1, "learning_rate": 1.0528787451864375e-05, "loss": 0.7206, "step": 9588, "task_loss": 1.0398075580596924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6529967784881592, "epoch": 8.11, "learning_rate": 1.0524091293322063e-05, "loss": 0.7341, "step": 9589, "task_loss": 0.15972480177879333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6593091487884521, "epoch": 8.11, "learning_rate": 1.0519395134779751e-05, "loss": 0.6464, "step": 9590, "task_loss": 0.7615033388137817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4179069995880127, "epoch": 8.11, "learning_rate": 1.051469897623744e-05, "loss": 0.5787, "step": 9591, "task_loss": 0.5175882577896118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6920779943466187, "epoch": 8.11, "learning_rate": 1.0510002817695126e-05, "loss": 0.6553, "step": 9592, "task_loss": 1.5671223402023315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5582391023635864, "epoch": 8.11, "learning_rate": 1.0505306659152814e-05, "loss": 0.5605, "step": 9593, "task_loss": 0.9209933280944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3108524978160858, "epoch": 8.11, "learning_rate": 1.0500610500610502e-05, "loss": 0.5813, "step": 9594, "task_loss": 0.39493125677108765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7317145466804504, "epoch": 8.11, "learning_rate": 1.0495914342068188e-05, "loss": 0.6907, "step": 9595, "task_loss": 0.8386626839637756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6233494281768799, "epoch": 8.11, "learning_rate": 1.0491218183525876e-05, "loss": 0.589, "step": 9596, "task_loss": 0.3758171498775482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.431942880153656, "epoch": 8.11, "learning_rate": 1.0486522024983564e-05, "loss": 0.4371, "step": 9597, "task_loss": 0.44773709774017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7049449682235718, "epoch": 8.11, "learning_rate": 1.048182586644125e-05, "loss": 0.5808, "step": 9598, "task_loss": 0.33941659331321716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.695440948009491, "epoch": 8.11, "learning_rate": 1.0477129707898939e-05, "loss": 0.5957, "step": 9599, "task_loss": 0.2897320091724396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4686157703399658, "epoch": 8.11, "learning_rate": 1.0472433549356627e-05, "loss": 0.5629, "step": 9600, "task_loss": 0.8260859251022339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5623164176940918, "epoch": 8.12, "learning_rate": 1.0467737390814315e-05, "loss": 0.6657, "step": 9601, "task_loss": 1.1238728761672974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47302550077438354, "epoch": 8.12, "learning_rate": 1.0463041232272001e-05, "loss": 0.4944, "step": 9602, "task_loss": 0.8181694149971008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8031943440437317, "epoch": 8.12, "learning_rate": 1.0458345073729689e-05, "loss": 0.9323, "step": 9603, "task_loss": 1.8125805854797363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5306625962257385, "epoch": 8.12, "learning_rate": 1.0453648915187377e-05, "loss": 0.4966, "step": 9604, "task_loss": 0.25914791226387024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8246955871582031, "epoch": 8.12, "learning_rate": 1.0448952756645065e-05, "loss": 0.5744, "step": 9605, "task_loss": 0.9749420285224915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8620201945304871, "epoch": 8.12, "learning_rate": 1.0444256598102753e-05, "loss": 0.6665, "step": 9606, "task_loss": 0.5471687316894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44490599632263184, "epoch": 8.12, "learning_rate": 1.0439560439560441e-05, "loss": 0.5803, "step": 9607, "task_loss": 0.4878123998641968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.733820915222168, "epoch": 8.12, "learning_rate": 1.0434864281018128e-05, "loss": 0.6365, "step": 9608, "task_loss": 0.6441925764083862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36357760429382324, "epoch": 8.12, "learning_rate": 1.0430168122475816e-05, "loss": 0.5572, "step": 9609, "task_loss": 0.15739966928958893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5703957080841064, "epoch": 8.12, "learning_rate": 1.0425471963933504e-05, "loss": 0.4975, "step": 9610, "task_loss": 1.3545721769332886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7515392303466797, "epoch": 8.12, "learning_rate": 1.042077580539119e-05, "loss": 0.7212, "step": 9611, "task_loss": 0.6423227190971375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42978012561798096, "epoch": 8.13, "learning_rate": 1.0416079646848878e-05, "loss": 0.5565, "step": 9612, "task_loss": 0.265924870967865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4373977780342102, "epoch": 8.13, "learning_rate": 1.0411383488306566e-05, "loss": 0.6478, "step": 9613, "task_loss": 0.6766417622566223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5850145220756531, "epoch": 8.13, "learning_rate": 1.0406687329764252e-05, "loss": 0.5635, "step": 9614, "task_loss": 0.09358639270067215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0222697257995605, "epoch": 8.13, "learning_rate": 1.040199117122194e-05, "loss": 0.6646, "step": 9615, "task_loss": 0.7159797549247742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4077756404876709, "epoch": 8.13, "learning_rate": 1.0397295012679628e-05, "loss": 0.4371, "step": 9616, "task_loss": 0.8624580502510071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7308584451675415, "epoch": 8.13, "learning_rate": 1.0392598854137316e-05, "loss": 0.7253, "step": 9617, "task_loss": 0.6728371977806091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4798913598060608, "epoch": 8.13, "learning_rate": 1.0387902695595003e-05, "loss": 0.5082, "step": 9618, "task_loss": 0.18188521265983582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7289467453956604, "epoch": 8.13, "learning_rate": 1.0383206537052693e-05, "loss": 0.7303, "step": 9619, "task_loss": 1.0255823135375977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.515568733215332, "epoch": 8.13, "learning_rate": 1.0378510378510379e-05, "loss": 0.5055, "step": 9620, "task_loss": 0.9581947326660156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6622647643089294, "epoch": 8.13, "learning_rate": 1.0373814219968067e-05, "loss": 0.6572, "step": 9621, "task_loss": 1.3260321617126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6470175385475159, "epoch": 8.13, "learning_rate": 1.0369118061425755e-05, "loss": 0.6214, "step": 9622, "task_loss": 0.7599854469299316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4801376461982727, "epoch": 8.13, "learning_rate": 1.0364421902883443e-05, "loss": 0.5218, "step": 9623, "task_loss": 0.4328024089336395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8643584251403809, "epoch": 8.14, "learning_rate": 1.035972574434113e-05, "loss": 0.5556, "step": 9624, "task_loss": 1.362401008605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5558688044548035, "epoch": 8.14, "learning_rate": 1.0355029585798817e-05, "loss": 0.6025, "step": 9625, "task_loss": 0.22913341224193573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4091380834579468, "epoch": 8.14, "learning_rate": 1.0350333427256505e-05, "loss": 0.4728, "step": 9626, "task_loss": 0.18021298944950104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43584200739860535, "epoch": 8.14, "learning_rate": 1.0345637268714192e-05, "loss": 0.6078, "step": 9627, "task_loss": 0.9064238667488098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.1762903332710266, "epoch": 8.14, "learning_rate": 1.034094111017188e-05, "loss": 0.5923, "step": 9628, "task_loss": 0.01924740895628929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7627092599868774, "epoch": 8.14, "learning_rate": 1.0336244951629568e-05, "loss": 0.715, "step": 9629, "task_loss": 1.085907220840454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47081637382507324, "epoch": 8.14, "learning_rate": 1.0331548793087254e-05, "loss": 0.5698, "step": 9630, "task_loss": 0.5714925527572632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2772781252861023, "epoch": 8.14, "learning_rate": 1.0326852634544942e-05, "loss": 0.383, "step": 9631, "task_loss": 0.6393018960952759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6902523636817932, "epoch": 8.14, "learning_rate": 1.032215647600263e-05, "loss": 0.5624, "step": 9632, "task_loss": 0.3776966333389282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20791667699813843, "epoch": 8.14, "learning_rate": 1.0317460317460318e-05, "loss": 0.48, "step": 9633, "task_loss": 0.14684775471687317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7786867618560791, "epoch": 8.14, "learning_rate": 1.0312764158918005e-05, "loss": 0.7661, "step": 9634, "task_loss": 1.068207025527954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5359975099563599, "epoch": 8.14, "learning_rate": 1.0308068000375694e-05, "loss": 0.535, "step": 9635, "task_loss": 0.7561880350112915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.730118989944458, "epoch": 8.15, "learning_rate": 1.030337184183338e-05, "loss": 0.7372, "step": 9636, "task_loss": 0.40356141328811646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.835820198059082, "epoch": 8.15, "learning_rate": 1.0298675683291069e-05, "loss": 0.6542, "step": 9637, "task_loss": 1.2195770740509033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6262813806533813, "epoch": 8.15, "learning_rate": 1.0293979524748757e-05, "loss": 0.5806, "step": 9638, "task_loss": 0.836056649684906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4134715795516968, "epoch": 8.15, "learning_rate": 1.0289283366206445e-05, "loss": 0.4763, "step": 9639, "task_loss": 1.5802605152130127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.556692361831665, "epoch": 8.15, "learning_rate": 1.0284587207664131e-05, "loss": 0.7114, "step": 9640, "task_loss": 0.5530619025230408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5864953994750977, "epoch": 8.15, "learning_rate": 1.0279891049121819e-05, "loss": 0.6382, "step": 9641, "task_loss": 0.8927907347679138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5987105369567871, "epoch": 8.15, "learning_rate": 1.0275194890579507e-05, "loss": 0.617, "step": 9642, "task_loss": 0.6005358695983887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39652904868125916, "epoch": 8.15, "learning_rate": 1.0270498732037193e-05, "loss": 0.4898, "step": 9643, "task_loss": 1.6250534057617188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7027376890182495, "epoch": 8.15, "learning_rate": 1.0265802573494881e-05, "loss": 0.4717, "step": 9644, "task_loss": 0.7919668555259705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5486020445823669, "epoch": 8.15, "learning_rate": 1.026110641495257e-05, "loss": 0.6, "step": 9645, "task_loss": 0.6079891324043274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4156404137611389, "epoch": 8.15, "learning_rate": 1.0256410256410256e-05, "loss": 0.5382, "step": 9646, "task_loss": 0.1482173204421997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35263124108314514, "epoch": 8.15, "learning_rate": 1.0251714097867944e-05, "loss": 0.6978, "step": 9647, "task_loss": 0.5375698208808899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5329613089561462, "epoch": 8.16, "learning_rate": 1.0247017939325632e-05, "loss": 0.5681, "step": 9648, "task_loss": 1.024849772453308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6611464023590088, "epoch": 8.16, "learning_rate": 1.0242321780783318e-05, "loss": 0.6055, "step": 9649, "task_loss": 0.10155653953552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6950768232345581, "epoch": 8.16, "learning_rate": 1.0237625622241008e-05, "loss": 0.5142, "step": 9650, "task_loss": 0.3393682539463043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32113179564476013, "epoch": 8.16, "learning_rate": 1.0232929463698696e-05, "loss": 0.5137, "step": 9651, "task_loss": 1.0396702289581299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7534380555152893, "epoch": 8.16, "learning_rate": 1.0228233305156382e-05, "loss": 0.5778, "step": 9652, "task_loss": 0.4444790780544281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8492867946624756, "epoch": 8.16, "learning_rate": 1.022353714661407e-05, "loss": 0.6517, "step": 9653, "task_loss": 0.8703060150146484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7189669609069824, "epoch": 8.16, "learning_rate": 1.0218840988071758e-05, "loss": 0.667, "step": 9654, "task_loss": 1.319606900215149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6175704002380371, "epoch": 8.16, "learning_rate": 1.0214144829529446e-05, "loss": 0.5485, "step": 9655, "task_loss": 0.6523827314376831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2409095764160156, "epoch": 8.16, "learning_rate": 1.0209448670987133e-05, "loss": 0.7424, "step": 9656, "task_loss": 0.9884382486343384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3530313968658447, "epoch": 8.16, "learning_rate": 1.020475251244482e-05, "loss": 0.6038, "step": 9657, "task_loss": 0.48726996779441833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3822699189186096, "epoch": 8.16, "learning_rate": 1.0200056353902509e-05, "loss": 0.6504, "step": 9658, "task_loss": 0.11936576664447784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5424160957336426, "epoch": 8.16, "learning_rate": 1.0195360195360195e-05, "loss": 0.6503, "step": 9659, "task_loss": 0.7380337715148926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5378329157829285, "epoch": 8.17, "learning_rate": 1.0190664036817883e-05, "loss": 0.7351, "step": 9660, "task_loss": 1.6195659637451172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5582855343818665, "epoch": 8.17, "learning_rate": 1.0185967878275571e-05, "loss": 0.6436, "step": 9661, "task_loss": 1.624598503112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9149881601333618, "epoch": 8.17, "learning_rate": 1.0181271719733258e-05, "loss": 0.5696, "step": 9662, "task_loss": 1.2571210861206055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4464848041534424, "epoch": 8.17, "learning_rate": 1.0176575561190946e-05, "loss": 0.5061, "step": 9663, "task_loss": 0.18248122930526733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.571691632270813, "epoch": 8.17, "learning_rate": 1.0171879402648634e-05, "loss": 0.5615, "step": 9664, "task_loss": 1.149930477142334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4990137219429016, "epoch": 8.17, "learning_rate": 1.016718324410632e-05, "loss": 0.6064, "step": 9665, "task_loss": 1.0181905031204224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.477236807346344, "epoch": 8.17, "learning_rate": 1.016248708556401e-05, "loss": 0.5367, "step": 9666, "task_loss": 1.084006905555725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6068460941314697, "epoch": 8.17, "learning_rate": 1.0157790927021698e-05, "loss": 0.5269, "step": 9667, "task_loss": 0.8979296088218689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.450136661529541, "epoch": 8.17, "learning_rate": 1.0153094768479384e-05, "loss": 0.5861, "step": 9668, "task_loss": 0.442084401845932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5763944983482361, "epoch": 8.17, "learning_rate": 1.0148398609937072e-05, "loss": 0.7372, "step": 9669, "task_loss": 0.5940802693367004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23386327922344208, "epoch": 8.17, "learning_rate": 1.014370245139476e-05, "loss": 0.4681, "step": 9670, "task_loss": 0.6548717617988586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43092072010040283, "epoch": 8.17, "learning_rate": 1.0139006292852447e-05, "loss": 0.7079, "step": 9671, "task_loss": 0.9332940578460693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7766908407211304, "epoch": 8.18, "learning_rate": 1.0134310134310135e-05, "loss": 0.6419, "step": 9672, "task_loss": 1.0150963068008423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46505236625671387, "epoch": 8.18, "learning_rate": 1.0129613975767823e-05, "loss": 0.4794, "step": 9673, "task_loss": 0.6934946179389954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9029327630996704, "epoch": 8.18, "learning_rate": 1.012491781722551e-05, "loss": 0.5521, "step": 9674, "task_loss": 0.5102378726005554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.366142600774765, "epoch": 8.18, "learning_rate": 1.0120221658683197e-05, "loss": 0.3938, "step": 9675, "task_loss": 0.9380089044570923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5105211734771729, "epoch": 8.18, "learning_rate": 1.0115525500140885e-05, "loss": 0.8085, "step": 9676, "task_loss": 0.9707764387130737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5969465970993042, "epoch": 8.18, "learning_rate": 1.0110829341598573e-05, "loss": 0.4342, "step": 9677, "task_loss": 0.48047658801078796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4193398356437683, "epoch": 8.18, "learning_rate": 1.010613318305626e-05, "loss": 0.5674, "step": 9678, "task_loss": 0.8955860137939453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4202116131782532, "epoch": 8.18, "learning_rate": 1.0101437024513947e-05, "loss": 0.5814, "step": 9679, "task_loss": 0.3602074682712555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6812478303909302, "epoch": 8.18, "learning_rate": 1.0096740865971635e-05, "loss": 0.4915, "step": 9680, "task_loss": 0.14519837498664856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2566354274749756, "epoch": 8.18, "learning_rate": 1.0092044707429323e-05, "loss": 0.8063, "step": 9681, "task_loss": 0.8407101035118103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36252814531326294, "epoch": 8.18, "learning_rate": 1.0087348548887012e-05, "loss": 0.6194, "step": 9682, "task_loss": 0.3001737594604492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47411048412323, "epoch": 8.19, "learning_rate": 1.00826523903447e-05, "loss": 0.4649, "step": 9683, "task_loss": 0.5101187229156494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47575801610946655, "epoch": 8.19, "learning_rate": 1.0077956231802386e-05, "loss": 0.539, "step": 9684, "task_loss": 0.4000416398048401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49338841438293457, "epoch": 8.19, "learning_rate": 1.0073260073260074e-05, "loss": 0.4154, "step": 9685, "task_loss": 1.0669198036193848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5545761585235596, "epoch": 8.19, "learning_rate": 1.0068563914717762e-05, "loss": 0.4591, "step": 9686, "task_loss": 0.8333578109741211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7649699449539185, "epoch": 8.19, "learning_rate": 1.0063867756175448e-05, "loss": 0.5717, "step": 9687, "task_loss": 0.8900883793830872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.291098415851593, "epoch": 8.19, "learning_rate": 1.0059171597633136e-05, "loss": 0.508, "step": 9688, "task_loss": 0.8146505355834961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5616877675056458, "epoch": 8.19, "learning_rate": 1.0054475439090824e-05, "loss": 0.6619, "step": 9689, "task_loss": 0.32093581557273865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3483119010925293, "epoch": 8.19, "learning_rate": 1.0049779280548512e-05, "loss": 0.5512, "step": 9690, "task_loss": 0.4091796278953552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7300522327423096, "epoch": 8.19, "learning_rate": 1.0045083122006199e-05, "loss": 0.654, "step": 9691, "task_loss": 0.897581934928894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8546731472015381, "epoch": 8.19, "learning_rate": 1.0040386963463887e-05, "loss": 0.7723, "step": 9692, "task_loss": 0.9717864394187927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42005980014801025, "epoch": 8.19, "learning_rate": 1.0035690804921575e-05, "loss": 0.5727, "step": 9693, "task_loss": 0.2694462835788727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5674551725387573, "epoch": 8.19, "learning_rate": 1.0030994646379261e-05, "loss": 0.5771, "step": 9694, "task_loss": 0.4497809410095215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6945803165435791, "epoch": 8.2, "learning_rate": 1.002629848783695e-05, "loss": 0.5494, "step": 9695, "task_loss": 2.1089043617248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7188489437103271, "epoch": 8.2, "learning_rate": 1.0021602329294639e-05, "loss": 0.6364, "step": 9696, "task_loss": 0.7523070573806763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2655196189880371, "epoch": 8.2, "learning_rate": 1.0016906170752325e-05, "loss": 0.6131, "step": 9697, "task_loss": 0.3392711579799652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3544926643371582, "epoch": 8.2, "learning_rate": 1.0012210012210013e-05, "loss": 0.4978, "step": 9698, "task_loss": 0.45918214321136475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40462374687194824, "epoch": 8.2, "learning_rate": 1.0007513853667701e-05, "loss": 0.4463, "step": 9699, "task_loss": 0.8340792059898376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7607669830322266, "epoch": 8.2, "learning_rate": 1.0002817695125388e-05, "loss": 0.612, "step": 9700, "task_loss": 0.6583400964736938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7745248079299927, "epoch": 8.2, "learning_rate": 9.998121536583076e-06, "loss": 0.5991, "step": 9701, "task_loss": 0.5977430939674377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.557267427444458, "epoch": 8.2, "learning_rate": 9.993425378040764e-06, "loss": 0.6087, "step": 9702, "task_loss": 0.6721687316894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6215983629226685, "epoch": 8.2, "learning_rate": 9.98872921949845e-06, "loss": 0.622, "step": 9703, "task_loss": 0.8346000909805298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6345351934432983, "epoch": 8.2, "learning_rate": 9.984033060956138e-06, "loss": 0.5236, "step": 9704, "task_loss": 1.017532467842102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6238666772842407, "epoch": 8.2, "learning_rate": 9.979336902413826e-06, "loss": 0.6775, "step": 9705, "task_loss": 1.0134828090667725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4055737853050232, "epoch": 8.2, "learning_rate": 9.974640743871514e-06, "loss": 0.6405, "step": 9706, "task_loss": 1.0618314743041992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3234702944755554, "epoch": 8.21, "learning_rate": 9.9699445853292e-06, "loss": 0.489, "step": 9707, "task_loss": 0.4082378149032593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35542815923690796, "epoch": 8.21, "learning_rate": 9.965248426786889e-06, "loss": 0.5287, "step": 9708, "task_loss": 0.22363245487213135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4324008822441101, "epoch": 8.21, "learning_rate": 9.960552268244577e-06, "loss": 0.6214, "step": 9709, "task_loss": 0.7141844034194946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.24309559166431427, "epoch": 8.21, "learning_rate": 9.955856109702263e-06, "loss": 0.5894, "step": 9710, "task_loss": 0.05265339836478233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7792916297912598, "epoch": 8.21, "learning_rate": 9.951159951159951e-06, "loss": 0.5879, "step": 9711, "task_loss": 0.6979584693908691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4068969488143921, "epoch": 8.21, "learning_rate": 9.94646379261764e-06, "loss": 0.4862, "step": 9712, "task_loss": 0.3067343533039093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5120339393615723, "epoch": 8.21, "learning_rate": 9.941767634075327e-06, "loss": 0.8402, "step": 9713, "task_loss": 0.6233176589012146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43113771080970764, "epoch": 8.21, "learning_rate": 9.937071475533015e-06, "loss": 0.6232, "step": 9714, "task_loss": 0.5898086428642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.665696918964386, "epoch": 8.21, "learning_rate": 9.932375316990703e-06, "loss": 0.595, "step": 9715, "task_loss": 0.6677228808403015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2720430791378021, "epoch": 8.21, "learning_rate": 9.92767915844839e-06, "loss": 0.4782, "step": 9716, "task_loss": 0.2785189747810364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.609301745891571, "epoch": 8.21, "learning_rate": 9.922982999906077e-06, "loss": 0.6168, "step": 9717, "task_loss": 0.6175473928451538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5398749113082886, "epoch": 8.21, "learning_rate": 9.918286841363765e-06, "loss": 0.6454, "step": 9718, "task_loss": 0.8945716619491577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.995951771736145, "epoch": 8.22, "learning_rate": 9.913590682821452e-06, "loss": 0.6411, "step": 9719, "task_loss": 1.336043357849121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7726734280586243, "epoch": 8.22, "learning_rate": 9.90889452427914e-06, "loss": 0.5187, "step": 9720, "task_loss": 0.6701869368553162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.562652587890625, "epoch": 8.22, "learning_rate": 9.904198365736828e-06, "loss": 0.6282, "step": 9721, "task_loss": 1.326302409172058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5632829666137695, "epoch": 8.22, "learning_rate": 9.899502207194516e-06, "loss": 0.5057, "step": 9722, "task_loss": 0.7973195314407349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9584172368049622, "epoch": 8.22, "learning_rate": 9.894806048652202e-06, "loss": 0.6318, "step": 9723, "task_loss": 1.7145293951034546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6352119445800781, "epoch": 8.22, "learning_rate": 9.89010989010989e-06, "loss": 0.5344, "step": 9724, "task_loss": 0.5287065505981445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.499528706073761, "epoch": 8.22, "learning_rate": 9.885413731567578e-06, "loss": 0.5415, "step": 9725, "task_loss": 0.39163243770599365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4306955337524414, "epoch": 8.22, "learning_rate": 9.880717573025265e-06, "loss": 0.3998, "step": 9726, "task_loss": 0.2651369869709015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4944876432418823, "epoch": 8.22, "learning_rate": 9.876021414482954e-06, "loss": 0.4729, "step": 9727, "task_loss": 1.165845274925232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6704347729682922, "epoch": 8.22, "learning_rate": 9.871325255940642e-06, "loss": 0.6356, "step": 9728, "task_loss": 1.2839113473892212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8214427828788757, "epoch": 8.22, "learning_rate": 9.866629097398329e-06, "loss": 0.7324, "step": 9729, "task_loss": 0.5895955562591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20682567358016968, "epoch": 8.22, "learning_rate": 9.861932938856017e-06, "loss": 0.3554, "step": 9730, "task_loss": 0.5936257839202881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4610383212566376, "epoch": 8.23, "learning_rate": 9.857236780313705e-06, "loss": 0.5255, "step": 9731, "task_loss": 0.6102650165557861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32398366928100586, "epoch": 8.23, "learning_rate": 9.852540621771391e-06, "loss": 0.4518, "step": 9732, "task_loss": 0.7745774984359741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.445776104927063, "epoch": 8.23, "learning_rate": 9.84784446322908e-06, "loss": 0.6412, "step": 9733, "task_loss": 0.5203332901000977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.573090136051178, "epoch": 8.23, "learning_rate": 9.843148304686767e-06, "loss": 0.5677, "step": 9734, "task_loss": 0.6419084668159485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4735628068447113, "epoch": 8.23, "learning_rate": 9.838452146144454e-06, "loss": 0.5953, "step": 9735, "task_loss": 0.3241812586784363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5336918234825134, "epoch": 8.23, "learning_rate": 9.833755987602142e-06, "loss": 0.7617, "step": 9736, "task_loss": 1.0960500240325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40026795864105225, "epoch": 8.23, "learning_rate": 9.82905982905983e-06, "loss": 0.6389, "step": 9737, "task_loss": 0.36770427227020264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5087677836418152, "epoch": 8.23, "learning_rate": 9.824363670517516e-06, "loss": 0.4718, "step": 9738, "task_loss": 1.3786020278930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42891961336135864, "epoch": 8.23, "learning_rate": 9.819667511975204e-06, "loss": 0.4519, "step": 9739, "task_loss": 1.9564852714538574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7476685047149658, "epoch": 8.23, "learning_rate": 9.814971353432892e-06, "loss": 0.6615, "step": 9740, "task_loss": 1.2797794342041016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49826616048812866, "epoch": 8.23, "learning_rate": 9.81027519489058e-06, "loss": 0.6383, "step": 9741, "task_loss": 0.5478835105895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45795324444770813, "epoch": 8.23, "learning_rate": 9.805579036348266e-06, "loss": 0.4398, "step": 9742, "task_loss": 0.909438967704773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6108505725860596, "epoch": 8.24, "learning_rate": 9.800882877805956e-06, "loss": 0.5077, "step": 9743, "task_loss": 1.0625921487808228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.328755646944046, "epoch": 8.24, "learning_rate": 9.796186719263643e-06, "loss": 0.4961, "step": 9744, "task_loss": 0.2692311406135559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44167360663414, "epoch": 8.24, "learning_rate": 9.79149056072133e-06, "loss": 0.4351, "step": 9745, "task_loss": 0.9758546352386475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5998272895812988, "epoch": 8.24, "learning_rate": 9.786794402179019e-06, "loss": 0.6467, "step": 9746, "task_loss": 0.7230185866355896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6485182642936707, "epoch": 8.24, "learning_rate": 9.782098243636707e-06, "loss": 0.6114, "step": 9747, "task_loss": 0.7181310057640076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5027868747711182, "epoch": 8.24, "learning_rate": 9.777402085094393e-06, "loss": 0.561, "step": 9748, "task_loss": 1.5743048191070557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.272612065076828, "epoch": 8.24, "learning_rate": 9.772705926552081e-06, "loss": 0.4502, "step": 9749, "task_loss": 0.5890927910804749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5301039218902588, "epoch": 8.24, "learning_rate": 9.768009768009769e-06, "loss": 0.4548, "step": 9750, "task_loss": 0.6430805325508118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7540709972381592, "epoch": 8.24, "learning_rate": 9.763313609467455e-06, "loss": 0.7002, "step": 9751, "task_loss": 1.4772731065750122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3252166509628296, "epoch": 8.24, "learning_rate": 9.758617450925143e-06, "loss": 0.493, "step": 9752, "task_loss": 0.5621254444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5571260452270508, "epoch": 8.24, "learning_rate": 9.753921292382831e-06, "loss": 0.5343, "step": 9753, "task_loss": 0.5988714694976807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38744351267814636, "epoch": 8.24, "learning_rate": 9.749225133840518e-06, "loss": 0.5826, "step": 9754, "task_loss": 0.2282389998435974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4892951250076294, "epoch": 8.25, "learning_rate": 9.744528975298206e-06, "loss": 0.5114, "step": 9755, "task_loss": 1.217790961265564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.523906946182251, "epoch": 8.25, "learning_rate": 9.739832816755894e-06, "loss": 0.529, "step": 9756, "task_loss": 0.8651268482208252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5354322195053101, "epoch": 8.25, "learning_rate": 9.735136658213582e-06, "loss": 0.5072, "step": 9757, "task_loss": 1.184323787689209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42520955204963684, "epoch": 8.25, "learning_rate": 9.73044049967127e-06, "loss": 0.4346, "step": 9758, "task_loss": 0.5469807982444763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3210119605064392, "epoch": 8.25, "learning_rate": 9.725744341128958e-06, "loss": 0.4141, "step": 9759, "task_loss": 0.3511040210723877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29372698068618774, "epoch": 8.25, "learning_rate": 9.721048182586644e-06, "loss": 0.4049, "step": 9760, "task_loss": 0.18916447460651398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5697832703590393, "epoch": 8.25, "learning_rate": 9.716352024044332e-06, "loss": 0.5522, "step": 9761, "task_loss": 1.0560085773468018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7433333396911621, "epoch": 8.25, "learning_rate": 9.71165586550202e-06, "loss": 0.6382, "step": 9762, "task_loss": 0.6183957457542419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4126461148262024, "epoch": 8.25, "learning_rate": 9.706959706959708e-06, "loss": 0.5105, "step": 9763, "task_loss": 0.6432529091835022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4319184422492981, "epoch": 8.25, "learning_rate": 9.702263548417395e-06, "loss": 0.7527, "step": 9764, "task_loss": 0.5215303301811218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39973318576812744, "epoch": 8.25, "learning_rate": 9.697567389875083e-06, "loss": 0.4376, "step": 9765, "task_loss": 0.6656894087791443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5883221626281738, "epoch": 8.26, "learning_rate": 9.69287123133277e-06, "loss": 0.7442, "step": 9766, "task_loss": 0.8088876008987427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40741658210754395, "epoch": 8.26, "learning_rate": 9.688175072790457e-06, "loss": 0.504, "step": 9767, "task_loss": 0.08677873015403748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.712173342704773, "epoch": 8.26, "learning_rate": 9.683478914248145e-06, "loss": 0.6045, "step": 9768, "task_loss": 0.44167467951774597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6513352990150452, "epoch": 8.26, "learning_rate": 9.678782755705833e-06, "loss": 0.7382, "step": 9769, "task_loss": 0.48553502559661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7241733074188232, "epoch": 8.26, "learning_rate": 9.67408659716352e-06, "loss": 0.7564, "step": 9770, "task_loss": 0.7730306386947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6585714817047119, "epoch": 8.26, "learning_rate": 9.669390438621208e-06, "loss": 0.6848, "step": 9771, "task_loss": 0.9087777733802795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6228389143943787, "epoch": 8.26, "learning_rate": 9.664694280078896e-06, "loss": 0.6036, "step": 9772, "task_loss": 0.8109902739524841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33220309019088745, "epoch": 8.26, "learning_rate": 9.659998121536584e-06, "loss": 0.4374, "step": 9773, "task_loss": 0.10776631534099579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6427100896835327, "epoch": 8.26, "learning_rate": 9.655301962994272e-06, "loss": 0.6378, "step": 9774, "task_loss": 0.683043360710144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3917993903160095, "epoch": 8.26, "learning_rate": 9.65060580445196e-06, "loss": 0.54, "step": 9775, "task_loss": 0.7201816439628601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5498101711273193, "epoch": 8.26, "learning_rate": 9.645909645909646e-06, "loss": 0.4773, "step": 9776, "task_loss": 0.8182201981544495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25872287154197693, "epoch": 8.26, "learning_rate": 9.641213487367334e-06, "loss": 0.5072, "step": 9777, "task_loss": 0.514519214630127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.51511549949646, "epoch": 8.27, "learning_rate": 9.636517328825022e-06, "loss": 0.5276, "step": 9778, "task_loss": 0.7080508470535278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4420640170574188, "epoch": 8.27, "learning_rate": 9.63182117028271e-06, "loss": 0.6357, "step": 9779, "task_loss": 0.8679093718528748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27520108222961426, "epoch": 8.27, "learning_rate": 9.627125011740396e-06, "loss": 0.5053, "step": 9780, "task_loss": 0.05081824213266373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5846917629241943, "epoch": 8.27, "learning_rate": 9.622428853198085e-06, "loss": 0.6561, "step": 9781, "task_loss": 0.25746241211891174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36217206716537476, "epoch": 8.27, "learning_rate": 9.617732694655773e-06, "loss": 0.5371, "step": 9782, "task_loss": 0.45610925555229187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4579867124557495, "epoch": 8.27, "learning_rate": 9.613036536113459e-06, "loss": 0.5528, "step": 9783, "task_loss": 1.120659589767456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.758806586265564, "epoch": 8.27, "learning_rate": 9.608340377571147e-06, "loss": 0.5567, "step": 9784, "task_loss": 1.1297321319580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33021819591522217, "epoch": 8.27, "learning_rate": 9.603644219028835e-06, "loss": 0.639, "step": 9785, "task_loss": 0.7937184572219849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5953812003135681, "epoch": 8.27, "learning_rate": 9.598948060486521e-06, "loss": 0.7165, "step": 9786, "task_loss": 0.6437649726867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6374558210372925, "epoch": 8.27, "learning_rate": 9.59425190194421e-06, "loss": 0.6016, "step": 9787, "task_loss": 0.7424291968345642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8017048835754395, "epoch": 8.27, "learning_rate": 9.589555743401897e-06, "loss": 0.6367, "step": 9788, "task_loss": 0.5568118691444397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8357959389686584, "epoch": 8.27, "learning_rate": 9.584859584859585e-06, "loss": 0.6543, "step": 9789, "task_loss": 0.9232833385467529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6738321781158447, "epoch": 8.28, "learning_rate": 9.580163426317273e-06, "loss": 0.5385, "step": 9790, "task_loss": 0.7109473943710327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4712334871292114, "epoch": 8.28, "learning_rate": 9.575467267774961e-06, "loss": 0.7312, "step": 9791, "task_loss": 1.2037276029586792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6693501472473145, "epoch": 8.28, "learning_rate": 9.570771109232648e-06, "loss": 0.5202, "step": 9792, "task_loss": 2.1047422885894775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9694004654884338, "epoch": 8.28, "learning_rate": 9.566074950690336e-06, "loss": 0.6129, "step": 9793, "task_loss": 0.8538075685501099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43758994340896606, "epoch": 8.28, "learning_rate": 9.561378792148024e-06, "loss": 0.5287, "step": 9794, "task_loss": 0.6865754127502441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6472204923629761, "epoch": 8.28, "learning_rate": 9.556682633605712e-06, "loss": 0.5687, "step": 9795, "task_loss": 1.2798367738723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8585596084594727, "epoch": 8.28, "learning_rate": 9.551986475063398e-06, "loss": 0.8127, "step": 9796, "task_loss": 1.3665858507156372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5408956408500671, "epoch": 8.28, "learning_rate": 9.547290316521086e-06, "loss": 0.5115, "step": 9797, "task_loss": 0.6298164129257202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.424552857875824, "epoch": 8.28, "learning_rate": 9.542594157978774e-06, "loss": 0.3862, "step": 9798, "task_loss": 0.039411406964063644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5289142727851868, "epoch": 8.28, "learning_rate": 9.53789799943646e-06, "loss": 0.5061, "step": 9799, "task_loss": 0.5404196381568909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.129946231842041, "epoch": 8.28, "learning_rate": 9.533201840894149e-06, "loss": 0.6086, "step": 9800, "task_loss": 1.0877065658569336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3574545681476593, "epoch": 8.28, "learning_rate": 9.528505682351837e-06, "loss": 0.5131, "step": 9801, "task_loss": 0.9301601648330688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5210535526275635, "epoch": 8.29, "learning_rate": 9.523809523809523e-06, "loss": 0.512, "step": 9802, "task_loss": 0.9841170310974121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44675412774086, "epoch": 8.29, "learning_rate": 9.519113365267211e-06, "loss": 0.5596, "step": 9803, "task_loss": 0.12356835603713989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4505787789821625, "epoch": 8.29, "learning_rate": 9.5144172067249e-06, "loss": 0.513, "step": 9804, "task_loss": 0.4944281280040741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.526336133480072, "epoch": 8.29, "learning_rate": 9.509721048182587e-06, "loss": 0.5685, "step": 9805, "task_loss": 0.44564059376716614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5431277751922607, "epoch": 8.29, "learning_rate": 9.505024889640275e-06, "loss": 0.482, "step": 9806, "task_loss": 1.199747920036316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34640926122665405, "epoch": 8.29, "learning_rate": 9.500328731097963e-06, "loss": 0.6061, "step": 9807, "task_loss": 0.7217783331871033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5892632007598877, "epoch": 8.29, "learning_rate": 9.49563257255565e-06, "loss": 0.4795, "step": 9808, "task_loss": 0.410609632730484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5585726499557495, "epoch": 8.29, "learning_rate": 9.490936414013338e-06, "loss": 0.7405, "step": 9809, "task_loss": 1.2846821546554565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7344493865966797, "epoch": 8.29, "learning_rate": 9.486240255471026e-06, "loss": 0.465, "step": 9810, "task_loss": 0.8095296025276184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4598012864589691, "epoch": 8.29, "learning_rate": 9.481544096928712e-06, "loss": 0.4468, "step": 9811, "task_loss": 0.5539670586585999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6377863883972168, "epoch": 8.29, "learning_rate": 9.4768479383864e-06, "loss": 0.5779, "step": 9812, "task_loss": 0.6633439064025879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29402726888656616, "epoch": 8.29, "learning_rate": 9.472151779844088e-06, "loss": 0.4129, "step": 9813, "task_loss": 0.21798260509967804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4857614040374756, "epoch": 8.3, "learning_rate": 9.467455621301776e-06, "loss": 0.6912, "step": 9814, "task_loss": 0.5932385325431824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5968766212463379, "epoch": 8.3, "learning_rate": 9.462759462759462e-06, "loss": 0.5771, "step": 9815, "task_loss": 0.247098907828331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7497870326042175, "epoch": 8.3, "learning_rate": 9.45806330421715e-06, "loss": 0.5713, "step": 9816, "task_loss": 1.2362128496170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.552804172039032, "epoch": 8.3, "learning_rate": 9.453367145674838e-06, "loss": 0.609, "step": 9817, "task_loss": 0.1970565766096115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22541718184947968, "epoch": 8.3, "learning_rate": 9.448670987132525e-06, "loss": 0.4929, "step": 9818, "task_loss": 1.2011363506317139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5659181475639343, "epoch": 8.3, "learning_rate": 9.443974828590213e-06, "loss": 0.6198, "step": 9819, "task_loss": 0.4159109890460968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23107323050498962, "epoch": 8.3, "learning_rate": 9.439278670047903e-06, "loss": 0.5945, "step": 9820, "task_loss": 0.8887677192687988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7545822858810425, "epoch": 8.3, "learning_rate": 9.434582511505589e-06, "loss": 0.6004, "step": 9821, "task_loss": 1.0762418508529663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6824984550476074, "epoch": 8.3, "learning_rate": 9.429886352963277e-06, "loss": 0.7768, "step": 9822, "task_loss": 1.316476821899414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7736609578132629, "epoch": 8.3, "learning_rate": 9.425190194420965e-06, "loss": 0.6033, "step": 9823, "task_loss": 0.5471997857093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7312540411949158, "epoch": 8.3, "learning_rate": 9.420494035878651e-06, "loss": 0.549, "step": 9824, "task_loss": 0.6482584476470947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42153728008270264, "epoch": 8.3, "learning_rate": 9.41579787733634e-06, "loss": 0.5171, "step": 9825, "task_loss": 0.5863528847694397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6940416693687439, "epoch": 8.31, "learning_rate": 9.411101718794027e-06, "loss": 0.573, "step": 9826, "task_loss": 0.5613134503364563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5054451823234558, "epoch": 8.31, "learning_rate": 9.406405560251714e-06, "loss": 0.4822, "step": 9827, "task_loss": 0.2842039465904236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6866984367370605, "epoch": 8.31, "learning_rate": 9.401709401709402e-06, "loss": 0.7226, "step": 9828, "task_loss": 1.023725152015686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3244076371192932, "epoch": 8.31, "learning_rate": 9.39701324316709e-06, "loss": 0.5121, "step": 9829, "task_loss": 0.550408124923706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5420448184013367, "epoch": 8.31, "learning_rate": 9.392317084624778e-06, "loss": 0.4586, "step": 9830, "task_loss": 0.6780598163604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32724741101264954, "epoch": 8.31, "learning_rate": 9.387620926082464e-06, "loss": 0.6314, "step": 9831, "task_loss": 0.6496874690055847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3571416735649109, "epoch": 8.31, "learning_rate": 9.382924767540152e-06, "loss": 0.3585, "step": 9832, "task_loss": 0.686508297920227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.701358437538147, "epoch": 8.31, "learning_rate": 9.37822860899784e-06, "loss": 0.5044, "step": 9833, "task_loss": 0.7468867897987366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5182033777236938, "epoch": 8.31, "learning_rate": 9.373532450455527e-06, "loss": 0.6396, "step": 9834, "task_loss": 0.3869730830192566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3870936334133148, "epoch": 8.31, "learning_rate": 9.368836291913216e-06, "loss": 0.5163, "step": 9835, "task_loss": 0.39969202876091003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6727232933044434, "epoch": 8.31, "learning_rate": 9.364140133370904e-06, "loss": 0.6262, "step": 9836, "task_loss": 0.6602930426597595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7136795520782471, "epoch": 8.32, "learning_rate": 9.35944397482859e-06, "loss": 0.6357, "step": 9837, "task_loss": 0.7449325919151306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5230521559715271, "epoch": 8.32, "learning_rate": 9.354747816286279e-06, "loss": 0.6123, "step": 9838, "task_loss": 0.8682950735092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5134536027908325, "epoch": 8.32, "learning_rate": 9.350051657743967e-06, "loss": 0.5807, "step": 9839, "task_loss": 0.5762718915939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7198781371116638, "epoch": 8.32, "learning_rate": 9.345355499201653e-06, "loss": 0.6157, "step": 9840, "task_loss": 1.0532041788101196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7158334851264954, "epoch": 8.32, "learning_rate": 9.340659340659341e-06, "loss": 0.4403, "step": 9841, "task_loss": 1.0027846097946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46309924125671387, "epoch": 8.32, "learning_rate": 9.33596318211703e-06, "loss": 0.4605, "step": 9842, "task_loss": 0.4700089693069458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.57912278175354, "epoch": 8.32, "learning_rate": 9.331267023574715e-06, "loss": 0.6323, "step": 9843, "task_loss": 0.8427464962005615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3779534101486206, "epoch": 8.32, "learning_rate": 9.326570865032404e-06, "loss": 0.4724, "step": 9844, "task_loss": 0.33405959606170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35224449634552, "epoch": 8.32, "learning_rate": 9.321874706490092e-06, "loss": 0.7443, "step": 9845, "task_loss": 0.4259592294692993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47084662318229675, "epoch": 8.32, "learning_rate": 9.31717854794778e-06, "loss": 0.5708, "step": 9846, "task_loss": 0.9425650238990784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7551038861274719, "epoch": 8.32, "learning_rate": 9.312482389405466e-06, "loss": 0.6328, "step": 9847, "task_loss": 0.5099654793739319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4672972559928894, "epoch": 8.32, "learning_rate": 9.307786230863154e-06, "loss": 0.4261, "step": 9848, "task_loss": 0.5409045815467834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2891404628753662, "epoch": 8.33, "learning_rate": 9.303090072320842e-06, "loss": 0.7567, "step": 9849, "task_loss": 1.220738172531128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9625993371009827, "epoch": 8.33, "learning_rate": 9.298393913778528e-06, "loss": 0.5627, "step": 9850, "task_loss": 1.092787265777588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5076558589935303, "epoch": 8.33, "learning_rate": 9.293697755236218e-06, "loss": 0.5747, "step": 9851, "task_loss": 0.4467574954032898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4465334117412567, "epoch": 8.33, "learning_rate": 9.289001596693906e-06, "loss": 0.4568, "step": 9852, "task_loss": 0.4624335467815399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9595004320144653, "epoch": 8.33, "learning_rate": 9.284305438151592e-06, "loss": 0.7431, "step": 9853, "task_loss": 0.8254464864730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8989959359169006, "epoch": 8.33, "learning_rate": 9.27960927960928e-06, "loss": 0.6264, "step": 9854, "task_loss": 1.0959153175354004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6846670508384705, "epoch": 8.33, "learning_rate": 9.274913121066969e-06, "loss": 0.545, "step": 9855, "task_loss": 0.7316596508026123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5249243974685669, "epoch": 8.33, "learning_rate": 9.270216962524655e-06, "loss": 0.485, "step": 9856, "task_loss": 0.15920794010162354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9239523410797119, "epoch": 8.33, "learning_rate": 9.265520803982343e-06, "loss": 0.5713, "step": 9857, "task_loss": 0.8075742721557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.410297155380249, "epoch": 8.33, "learning_rate": 9.260824645440031e-06, "loss": 0.5242, "step": 9858, "task_loss": 0.8583146929740906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7461534738540649, "epoch": 8.33, "learning_rate": 9.256128486897717e-06, "loss": 0.5406, "step": 9859, "task_loss": 0.5877596139907837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6504094004631042, "epoch": 8.33, "learning_rate": 9.251432328355405e-06, "loss": 0.6921, "step": 9860, "task_loss": 1.1524240970611572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4042786657810211, "epoch": 8.34, "learning_rate": 9.246736169813093e-06, "loss": 0.5406, "step": 9861, "task_loss": 0.09624804556369781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3460994362831116, "epoch": 8.34, "learning_rate": 9.242040011270781e-06, "loss": 0.6901, "step": 9862, "task_loss": 0.6882423758506775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6399083137512207, "epoch": 8.34, "learning_rate": 9.237343852728468e-06, "loss": 0.5531, "step": 9863, "task_loss": 0.6773644685745239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5624182820320129, "epoch": 8.34, "learning_rate": 9.232647694186156e-06, "loss": 0.5945, "step": 9864, "task_loss": 0.47147059440612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2829819917678833, "epoch": 8.34, "learning_rate": 9.227951535643844e-06, "loss": 0.4822, "step": 9865, "task_loss": 0.18068411946296692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7106842398643494, "epoch": 8.34, "learning_rate": 9.223255377101532e-06, "loss": 0.6952, "step": 9866, "task_loss": 0.5687665343284607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.444815993309021, "epoch": 8.34, "learning_rate": 9.21855921855922e-06, "loss": 0.5274, "step": 9867, "task_loss": 0.10060250014066696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6980248689651489, "epoch": 8.34, "learning_rate": 9.213863060016908e-06, "loss": 0.5721, "step": 9868, "task_loss": 0.705528199672699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43110477924346924, "epoch": 8.34, "learning_rate": 9.209166901474594e-06, "loss": 0.4188, "step": 9869, "task_loss": 0.20876123011112213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3453638553619385, "epoch": 8.34, "learning_rate": 9.204470742932282e-06, "loss": 0.504, "step": 9870, "task_loss": 0.4599919021129608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32639408111572266, "epoch": 8.34, "learning_rate": 9.19977458438997e-06, "loss": 0.3325, "step": 9871, "task_loss": 0.9029172658920288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6806925535202026, "epoch": 8.34, "learning_rate": 9.195078425847657e-06, "loss": 0.6599, "step": 9872, "task_loss": 0.5950111150741577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5843950510025024, "epoch": 8.35, "learning_rate": 9.190382267305345e-06, "loss": 0.6397, "step": 9873, "task_loss": 1.0508968830108643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41063106060028076, "epoch": 8.35, "learning_rate": 9.185686108763033e-06, "loss": 0.7784, "step": 9874, "task_loss": 0.635343611240387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7256381511688232, "epoch": 8.35, "learning_rate": 9.180989950220719e-06, "loss": 0.6194, "step": 9875, "task_loss": 2.0023581981658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5069853067398071, "epoch": 8.35, "learning_rate": 9.176293791678407e-06, "loss": 0.4845, "step": 9876, "task_loss": 0.10335472971200943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43046170473098755, "epoch": 8.35, "learning_rate": 9.171597633136095e-06, "loss": 0.5375, "step": 9877, "task_loss": 0.4157020151615143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.533750057220459, "epoch": 8.35, "learning_rate": 9.166901474593783e-06, "loss": 0.4624, "step": 9878, "task_loss": 0.6177086234092712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38084644079208374, "epoch": 8.35, "learning_rate": 9.16220531605147e-06, "loss": 0.3739, "step": 9879, "task_loss": 0.1281488537788391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22021713852882385, "epoch": 8.35, "learning_rate": 9.157509157509158e-06, "loss": 0.4644, "step": 9880, "task_loss": 0.27022141218185425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4078410565853119, "epoch": 8.35, "learning_rate": 9.152812998966846e-06, "loss": 0.5868, "step": 9881, "task_loss": 0.31821632385253906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5515609979629517, "epoch": 8.35, "learning_rate": 9.148116840424534e-06, "loss": 0.4623, "step": 9882, "task_loss": 0.4838623106479645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8344786167144775, "epoch": 8.35, "learning_rate": 9.143420681882222e-06, "loss": 0.581, "step": 9883, "task_loss": 1.0338114500045776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.602864146232605, "epoch": 8.35, "learning_rate": 9.13872452333991e-06, "loss": 0.6864, "step": 9884, "task_loss": 0.6836211085319519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44459831714630127, "epoch": 8.36, "learning_rate": 9.134028364797596e-06, "loss": 0.5839, "step": 9885, "task_loss": 0.7362571954727173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.546707034111023, "epoch": 8.36, "learning_rate": 9.129332206255284e-06, "loss": 0.4967, "step": 9886, "task_loss": 0.5246952772140503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7502166032791138, "epoch": 8.36, "learning_rate": 9.124636047712972e-06, "loss": 0.5536, "step": 9887, "task_loss": 1.1476266384124756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6215810179710388, "epoch": 8.36, "learning_rate": 9.119939889170658e-06, "loss": 0.7639, "step": 9888, "task_loss": 0.9441189765930176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7858877182006836, "epoch": 8.36, "learning_rate": 9.115243730628346e-06, "loss": 0.7488, "step": 9889, "task_loss": 0.33262619376182556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4562780261039734, "epoch": 8.36, "learning_rate": 9.110547572086034e-06, "loss": 0.6376, "step": 9890, "task_loss": 0.5660277605056763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4226152300834656, "epoch": 8.36, "learning_rate": 9.10585141354372e-06, "loss": 0.4095, "step": 9891, "task_loss": 0.3288862407207489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8253062963485718, "epoch": 8.36, "learning_rate": 9.101155255001409e-06, "loss": 0.6344, "step": 9892, "task_loss": 0.7457794547080994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5540324449539185, "epoch": 8.36, "learning_rate": 9.096459096459097e-06, "loss": 0.5542, "step": 9893, "task_loss": 0.6821209788322449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41184282302856445, "epoch": 8.36, "learning_rate": 9.091762937916783e-06, "loss": 0.4721, "step": 9894, "task_loss": 0.49386656284332275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5689901113510132, "epoch": 8.36, "learning_rate": 9.087066779374471e-06, "loss": 0.5003, "step": 9895, "task_loss": 0.5945345759391785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2851574718952179, "epoch": 8.36, "learning_rate": 9.08237062083216e-06, "loss": 0.5832, "step": 9896, "task_loss": 0.5732107758522034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2507196068763733, "epoch": 8.37, "learning_rate": 9.077674462289847e-06, "loss": 0.4895, "step": 9897, "task_loss": 0.691851794719696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4416786730289459, "epoch": 8.37, "learning_rate": 9.072978303747535e-06, "loss": 0.7029, "step": 9898, "task_loss": 0.5714308023452759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9821386337280273, "epoch": 8.37, "learning_rate": 9.068282145205223e-06, "loss": 0.6547, "step": 9899, "task_loss": 0.3928053677082062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6818063259124756, "epoch": 8.37, "learning_rate": 9.06358598666291e-06, "loss": 0.7906, "step": 9900, "task_loss": 1.0516163110733032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8048753142356873, "epoch": 8.37, "learning_rate": 9.058889828120598e-06, "loss": 0.6706, "step": 9901, "task_loss": 0.9235149025917053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7325043082237244, "epoch": 8.37, "learning_rate": 9.054193669578286e-06, "loss": 0.7559, "step": 9902, "task_loss": 0.9463815689086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39544016122817993, "epoch": 8.37, "learning_rate": 9.049497511035974e-06, "loss": 0.469, "step": 9903, "task_loss": 0.15386584401130676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4801464080810547, "epoch": 8.37, "learning_rate": 9.04480135249366e-06, "loss": 0.5932, "step": 9904, "task_loss": 0.28420203924179077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5612540245056152, "epoch": 8.37, "learning_rate": 9.040105193951348e-06, "loss": 0.5011, "step": 9905, "task_loss": 0.660342812538147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47385135293006897, "epoch": 8.37, "learning_rate": 9.035409035409036e-06, "loss": 0.5964, "step": 9906, "task_loss": 1.402239203453064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4128328263759613, "epoch": 8.37, "learning_rate": 9.030712876866723e-06, "loss": 0.5044, "step": 9907, "task_loss": 0.3803658187389374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4382241368293762, "epoch": 8.38, "learning_rate": 9.02601671832441e-06, "loss": 0.4267, "step": 9908, "task_loss": 0.8853191137313843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34184759855270386, "epoch": 8.38, "learning_rate": 9.021320559782099e-06, "loss": 0.4768, "step": 9909, "task_loss": 0.7819085717201233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5643121004104614, "epoch": 8.38, "learning_rate": 9.016624401239785e-06, "loss": 0.5003, "step": 9910, "task_loss": 0.42143404483795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7201863527297974, "epoch": 8.38, "learning_rate": 9.011928242697473e-06, "loss": 0.5546, "step": 9911, "task_loss": 1.3284950256347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5839823484420776, "epoch": 8.38, "learning_rate": 9.007232084155163e-06, "loss": 0.5652, "step": 9912, "task_loss": 0.33166030049324036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38592734932899475, "epoch": 8.38, "learning_rate": 9.002535925612849e-06, "loss": 0.4959, "step": 9913, "task_loss": 1.1307443380355835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.640953779220581, "epoch": 8.38, "learning_rate": 8.997839767070537e-06, "loss": 0.7092, "step": 9914, "task_loss": 0.7431163787841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31718337535858154, "epoch": 8.38, "learning_rate": 8.993143608528225e-06, "loss": 0.4287, "step": 9915, "task_loss": 0.12039118260145187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5788823962211609, "epoch": 8.38, "learning_rate": 8.988447449985911e-06, "loss": 0.5043, "step": 9916, "task_loss": 0.3690899610519409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3280255198478699, "epoch": 8.38, "learning_rate": 8.9837512914436e-06, "loss": 0.6785, "step": 9917, "task_loss": 0.3083937466144562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46402707695961, "epoch": 8.38, "learning_rate": 8.979055132901288e-06, "loss": 0.675, "step": 9918, "task_loss": 0.46206536889076233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9173400402069092, "epoch": 8.38, "learning_rate": 8.974358974358976e-06, "loss": 0.5719, "step": 9919, "task_loss": 1.5207709074020386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7511264681816101, "epoch": 8.39, "learning_rate": 8.969662815816662e-06, "loss": 0.6149, "step": 9920, "task_loss": 1.3965877294540405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5534853935241699, "epoch": 8.39, "learning_rate": 8.96496665727435e-06, "loss": 0.4452, "step": 9921, "task_loss": 0.5477822422981262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3337644636631012, "epoch": 8.39, "learning_rate": 8.960270498732038e-06, "loss": 0.6024, "step": 9922, "task_loss": 0.3655329644680023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3146843910217285, "epoch": 8.39, "learning_rate": 8.955574340189724e-06, "loss": 0.5224, "step": 9923, "task_loss": 0.2205556333065033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.373489111661911, "epoch": 8.39, "learning_rate": 8.950878181647412e-06, "loss": 0.5001, "step": 9924, "task_loss": 0.5447070598602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.909636378288269, "epoch": 8.39, "learning_rate": 8.9461820231051e-06, "loss": 0.7326, "step": 9925, "task_loss": 0.8775274157524109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4476146399974823, "epoch": 8.39, "learning_rate": 8.941485864562787e-06, "loss": 0.5477, "step": 9926, "task_loss": 0.972174346446991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5034589767456055, "epoch": 8.39, "learning_rate": 8.936789706020475e-06, "loss": 0.6389, "step": 9927, "task_loss": 0.5106055736541748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43141257762908936, "epoch": 8.39, "learning_rate": 8.932093547478164e-06, "loss": 0.5481, "step": 9928, "task_loss": 0.8714046478271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4261969327926636, "epoch": 8.39, "learning_rate": 8.92739738893585e-06, "loss": 0.5818, "step": 9929, "task_loss": 0.08750349283218384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7194413542747498, "epoch": 8.39, "learning_rate": 8.922701230393539e-06, "loss": 0.5487, "step": 9930, "task_loss": 0.5924075245857239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6696881651878357, "epoch": 8.39, "learning_rate": 8.918005071851227e-06, "loss": 0.5661, "step": 9931, "task_loss": 0.5555426478385925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5446509122848511, "epoch": 8.4, "learning_rate": 8.913308913308913e-06, "loss": 0.6505, "step": 9932, "task_loss": 0.372895210981369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4515734314918518, "epoch": 8.4, "learning_rate": 8.908612754766601e-06, "loss": 0.5423, "step": 9933, "task_loss": 0.5833816528320312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2785947918891907, "epoch": 8.4, "learning_rate": 8.90391659622429e-06, "loss": 0.5032, "step": 9934, "task_loss": 0.4557320475578308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30781638622283936, "epoch": 8.4, "learning_rate": 8.899220437681977e-06, "loss": 0.466, "step": 9935, "task_loss": 0.4561280906200409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29733866453170776, "epoch": 8.4, "learning_rate": 8.894524279139664e-06, "loss": 0.4854, "step": 9936, "task_loss": 0.12015135586261749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46201521158218384, "epoch": 8.4, "learning_rate": 8.889828120597352e-06, "loss": 0.4627, "step": 9937, "task_loss": 0.6367649435997009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6993468403816223, "epoch": 8.4, "learning_rate": 8.88513196205504e-06, "loss": 0.5773, "step": 9938, "task_loss": 0.6155622601509094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4917026460170746, "epoch": 8.4, "learning_rate": 8.880435803512726e-06, "loss": 0.5569, "step": 9939, "task_loss": 0.34616437554359436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5455585718154907, "epoch": 8.4, "learning_rate": 8.875739644970414e-06, "loss": 0.3921, "step": 9940, "task_loss": 0.39848724007606506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5908718109130859, "epoch": 8.4, "learning_rate": 8.871043486428102e-06, "loss": 0.4618, "step": 9941, "task_loss": 0.7051546573638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38425591588020325, "epoch": 8.4, "learning_rate": 8.866347327885788e-06, "loss": 0.623, "step": 9942, "task_loss": 0.6424261927604675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4911694824695587, "epoch": 8.4, "learning_rate": 8.861651169343478e-06, "loss": 0.5353, "step": 9943, "task_loss": 0.3585168421268463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2849941551685333, "epoch": 8.41, "learning_rate": 8.856955010801166e-06, "loss": 0.4479, "step": 9944, "task_loss": 0.10542943328619003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47986310720443726, "epoch": 8.41, "learning_rate": 8.852258852258853e-06, "loss": 0.4976, "step": 9945, "task_loss": 0.374702513217926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5915270447731018, "epoch": 8.41, "learning_rate": 8.84756269371654e-06, "loss": 0.7646, "step": 9946, "task_loss": 0.6874126195907593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5676836371421814, "epoch": 8.41, "learning_rate": 8.842866535174229e-06, "loss": 0.6468, "step": 9947, "task_loss": 0.8975026607513428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4295034408569336, "epoch": 8.41, "learning_rate": 8.838170376631915e-06, "loss": 0.7204, "step": 9948, "task_loss": 0.3749655783176422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5758320689201355, "epoch": 8.41, "learning_rate": 8.833474218089603e-06, "loss": 0.5394, "step": 9949, "task_loss": 1.805088758468628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32513272762298584, "epoch": 8.41, "learning_rate": 8.828778059547291e-06, "loss": 0.4092, "step": 9950, "task_loss": 0.19630150496959686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9707795977592468, "epoch": 8.41, "learning_rate": 8.824081901004979e-06, "loss": 0.706, "step": 9951, "task_loss": 0.9660483002662659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6217832565307617, "epoch": 8.41, "learning_rate": 8.819385742462665e-06, "loss": 0.4895, "step": 9952, "task_loss": 0.42119812965393066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3025924265384674, "epoch": 8.41, "learning_rate": 8.814689583920353e-06, "loss": 0.4691, "step": 9953, "task_loss": 0.5430800914764404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5399686098098755, "epoch": 8.41, "learning_rate": 8.809993425378042e-06, "loss": 0.5621, "step": 9954, "task_loss": 1.139549970626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2663201689720154, "epoch": 8.41, "learning_rate": 8.805297266835728e-06, "loss": 0.5196, "step": 9955, "task_loss": 0.9680528044700623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6568381786346436, "epoch": 8.42, "learning_rate": 8.800601108293416e-06, "loss": 0.6026, "step": 9956, "task_loss": 1.3204718828201294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5357798337936401, "epoch": 8.42, "learning_rate": 8.795904949751104e-06, "loss": 0.4536, "step": 9957, "task_loss": 0.42011573910713196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3213920593261719, "epoch": 8.42, "learning_rate": 8.791208791208792e-06, "loss": 0.4888, "step": 9958, "task_loss": 0.9737111926078796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5421769618988037, "epoch": 8.42, "learning_rate": 8.78651263266648e-06, "loss": 0.5433, "step": 9959, "task_loss": 0.9650179743766785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20753741264343262, "epoch": 8.42, "learning_rate": 8.781816474124168e-06, "loss": 0.4986, "step": 9960, "task_loss": 0.39006805419921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5970101356506348, "epoch": 8.42, "learning_rate": 8.777120315581854e-06, "loss": 0.5135, "step": 9961, "task_loss": 0.3009363114833832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6671257019042969, "epoch": 8.42, "learning_rate": 8.772424157039542e-06, "loss": 0.6101, "step": 9962, "task_loss": 1.0379154682159424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8711392879486084, "epoch": 8.42, "learning_rate": 8.76772799849723e-06, "loss": 0.5779, "step": 9963, "task_loss": 1.4534447193145752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.763811469078064, "epoch": 8.42, "learning_rate": 8.763031839954917e-06, "loss": 0.8156, "step": 9964, "task_loss": 1.2084267139434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3617333769798279, "epoch": 8.42, "learning_rate": 8.758335681412605e-06, "loss": 0.5307, "step": 9965, "task_loss": 0.9749069809913635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7513591647148132, "epoch": 8.42, "learning_rate": 8.753639522870293e-06, "loss": 0.6691, "step": 9966, "task_loss": 0.6057542562484741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.661767840385437, "epoch": 8.42, "learning_rate": 8.74894336432798e-06, "loss": 0.6703, "step": 9967, "task_loss": 0.678107738494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5381221175193787, "epoch": 8.43, "learning_rate": 8.744247205785667e-06, "loss": 0.7359, "step": 9968, "task_loss": 1.9459682703018188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6304236650466919, "epoch": 8.43, "learning_rate": 8.739551047243355e-06, "loss": 0.4902, "step": 9969, "task_loss": 0.3533030152320862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.367307186126709, "epoch": 8.43, "learning_rate": 8.734854888701043e-06, "loss": 0.5298, "step": 9970, "task_loss": 1.1322321891784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27767831087112427, "epoch": 8.43, "learning_rate": 8.73015873015873e-06, "loss": 0.4663, "step": 9971, "task_loss": 0.13401243090629578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3136594891548157, "epoch": 8.43, "learning_rate": 8.725462571616418e-06, "loss": 0.5118, "step": 9972, "task_loss": 0.5296337008476257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5264785885810852, "epoch": 8.43, "learning_rate": 8.720766413074106e-06, "loss": 0.6614, "step": 9973, "task_loss": 0.9530460238456726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5578907132148743, "epoch": 8.43, "learning_rate": 8.716070254531794e-06, "loss": 0.5565, "step": 9974, "task_loss": 1.0055028200149536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5714325904846191, "epoch": 8.43, "learning_rate": 8.711374095989482e-06, "loss": 0.5785, "step": 9975, "task_loss": 0.47915226221084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49294513463974, "epoch": 8.43, "learning_rate": 8.70667793744717e-06, "loss": 0.5424, "step": 9976, "task_loss": 0.6642913818359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6583653688430786, "epoch": 8.43, "learning_rate": 8.701981778904856e-06, "loss": 0.6671, "step": 9977, "task_loss": 1.0205702781677246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7613810300827026, "epoch": 8.43, "learning_rate": 8.697285620362544e-06, "loss": 0.5539, "step": 9978, "task_loss": 1.031827688217163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5802471041679382, "epoch": 8.44, "learning_rate": 8.692589461820232e-06, "loss": 0.6252, "step": 9979, "task_loss": 1.8595950603485107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7182286977767944, "epoch": 8.44, "learning_rate": 8.687893303277919e-06, "loss": 0.7738, "step": 9980, "task_loss": 1.348763108253479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42168959975242615, "epoch": 8.44, "learning_rate": 8.683197144735607e-06, "loss": 0.4288, "step": 9981, "task_loss": 0.4145384132862091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6082565784454346, "epoch": 8.44, "learning_rate": 8.678500986193295e-06, "loss": 0.6529, "step": 9982, "task_loss": 1.3244729042053223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29717817902565, "epoch": 8.44, "learning_rate": 8.673804827650981e-06, "loss": 0.4815, "step": 9983, "task_loss": 0.387093186378479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6100088357925415, "epoch": 8.44, "learning_rate": 8.669108669108669e-06, "loss": 0.6398, "step": 9984, "task_loss": 0.9132628440856934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47162073850631714, "epoch": 8.44, "learning_rate": 8.664412510566357e-06, "loss": 0.6393, "step": 9985, "task_loss": 0.19711558520793915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5330845713615417, "epoch": 8.44, "learning_rate": 8.659716352024045e-06, "loss": 0.5794, "step": 9986, "task_loss": 0.6849272847175598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8227648735046387, "epoch": 8.44, "learning_rate": 8.655020193481731e-06, "loss": 0.6614, "step": 9987, "task_loss": 1.1872047185897827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5226609706878662, "epoch": 8.44, "learning_rate": 8.65032403493942e-06, "loss": 0.6185, "step": 9988, "task_loss": 1.0937620401382446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42052704095840454, "epoch": 8.44, "learning_rate": 8.645627876397107e-06, "loss": 0.4709, "step": 9989, "task_loss": 0.5768736004829407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5364763140678406, "epoch": 8.44, "learning_rate": 8.640931717854795e-06, "loss": 0.5172, "step": 9990, "task_loss": 0.8856743574142456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7680165767669678, "epoch": 8.45, "learning_rate": 8.636235559312484e-06, "loss": 0.6657, "step": 9991, "task_loss": 0.682752251625061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9503989219665527, "epoch": 8.45, "learning_rate": 8.631539400770172e-06, "loss": 0.6391, "step": 9992, "task_loss": 0.8580178022384644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2980901300907135, "epoch": 8.45, "learning_rate": 8.626843242227858e-06, "loss": 0.5618, "step": 9993, "task_loss": 0.16682946681976318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6299120187759399, "epoch": 8.45, "learning_rate": 8.622147083685546e-06, "loss": 0.5697, "step": 9994, "task_loss": 0.4846227765083313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7331041097640991, "epoch": 8.45, "learning_rate": 8.617450925143234e-06, "loss": 0.5717, "step": 9995, "task_loss": 0.7894541621208191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2859881818294525, "epoch": 8.45, "learning_rate": 8.61275476660092e-06, "loss": 0.5429, "step": 9996, "task_loss": 0.28261926770210266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5740426778793335, "epoch": 8.45, "learning_rate": 8.608058608058608e-06, "loss": 0.7179, "step": 9997, "task_loss": 1.1668803691864014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3266582190990448, "epoch": 8.45, "learning_rate": 8.603362449516296e-06, "loss": 0.4764, "step": 9998, "task_loss": 0.42472049593925476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9344483613967896, "epoch": 8.45, "learning_rate": 8.598666290973983e-06, "loss": 0.5965, "step": 9999, "task_loss": 1.1691269874572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4198623299598694, "epoch": 8.45, "learning_rate": 8.59397013243167e-06, "loss": 0.4921, "step": 10000, "task_loss": 0.4757460951805115 }, { "epoch": 8.45, "eval_accuracy": 0.9025742574257426, "eval_loss": 0.37651845812797546, "eval_runtime": 226.1659, "eval_samples_per_second": 111.644, "eval_steps_per_second": 0.875, "step": 10000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3017149567604065, "epoch": 8.45, "learning_rate": 8.589273973889359e-06, "loss": 0.4227, "step": 10001, "task_loss": 0.7430132627487183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5091423392295837, "epoch": 8.45, "learning_rate": 8.584577815347047e-06, "loss": 0.4842, "step": 10002, "task_loss": 0.7173858880996704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38988274335861206, "epoch": 8.46, "learning_rate": 8.579881656804733e-06, "loss": 0.4981, "step": 10003, "task_loss": 0.5944151878356934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.348987340927124, "epoch": 8.46, "learning_rate": 8.575185498262421e-06, "loss": 0.5361, "step": 10004, "task_loss": 0.27893880009651184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5960767269134521, "epoch": 8.46, "learning_rate": 8.57048933972011e-06, "loss": 0.6611, "step": 10005, "task_loss": 0.3596271574497223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45892226696014404, "epoch": 8.46, "learning_rate": 8.565793181177797e-06, "loss": 0.4822, "step": 10006, "task_loss": 0.7849210500717163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7698813080787659, "epoch": 8.46, "learning_rate": 8.561097022635485e-06, "loss": 0.7169, "step": 10007, "task_loss": 0.26602986454963684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46376562118530273, "epoch": 8.46, "learning_rate": 8.556400864093173e-06, "loss": 0.5563, "step": 10008, "task_loss": 0.5032181739807129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43545717000961304, "epoch": 8.46, "learning_rate": 8.55170470555086e-06, "loss": 0.4423, "step": 10009, "task_loss": 0.3387356698513031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4640302062034607, "epoch": 8.46, "learning_rate": 8.547008547008548e-06, "loss": 0.5529, "step": 10010, "task_loss": 0.6506044268608093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5258544683456421, "epoch": 8.46, "learning_rate": 8.542312388466236e-06, "loss": 0.6224, "step": 10011, "task_loss": 0.5733548402786255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9912153482437134, "epoch": 8.46, "learning_rate": 8.537616229923922e-06, "loss": 0.7333, "step": 10012, "task_loss": 1.0554307699203491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5356686115264893, "epoch": 8.46, "learning_rate": 8.53292007138161e-06, "loss": 0.4813, "step": 10013, "task_loss": 1.0235570669174194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47651442885398865, "epoch": 8.46, "learning_rate": 8.528223912839298e-06, "loss": 0.4859, "step": 10014, "task_loss": 1.3161373138427734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35693198442459106, "epoch": 8.47, "learning_rate": 8.523527754296984e-06, "loss": 0.5586, "step": 10015, "task_loss": 0.4627190828323364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8183948993682861, "epoch": 8.47, "learning_rate": 8.518831595754672e-06, "loss": 0.6027, "step": 10016, "task_loss": 0.2677037715911865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5472632646560669, "epoch": 8.47, "learning_rate": 8.51413543721236e-06, "loss": 0.6024, "step": 10017, "task_loss": 0.48591431975364685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.630576491355896, "epoch": 8.47, "learning_rate": 8.509439278670049e-06, "loss": 0.6012, "step": 10018, "task_loss": 0.442399263381958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33686092495918274, "epoch": 8.47, "learning_rate": 8.504743120127735e-06, "loss": 0.4132, "step": 10019, "task_loss": 0.2704259157180786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.617949903011322, "epoch": 8.47, "learning_rate": 8.500046961585425e-06, "loss": 0.636, "step": 10020, "task_loss": 0.15515285730361938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6718248724937439, "epoch": 8.47, "learning_rate": 8.495350803043111e-06, "loss": 0.4546, "step": 10021, "task_loss": 0.28392261266708374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31976184248924255, "epoch": 8.47, "learning_rate": 8.490654644500799e-06, "loss": 0.5125, "step": 10022, "task_loss": 0.6169281005859375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7102287411689758, "epoch": 8.47, "learning_rate": 8.485958485958487e-06, "loss": 0.5228, "step": 10023, "task_loss": 1.1593163013458252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4569820165634155, "epoch": 8.47, "learning_rate": 8.481262327416175e-06, "loss": 0.7145, "step": 10024, "task_loss": 0.6015130877494812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4448516368865967, "epoch": 8.47, "learning_rate": 8.476566168873861e-06, "loss": 0.5036, "step": 10025, "task_loss": 0.8866956233978271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9973589777946472, "epoch": 8.47, "learning_rate": 8.47187001033155e-06, "loss": 0.6455, "step": 10026, "task_loss": 0.42801979184150696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6565727591514587, "epoch": 8.48, "learning_rate": 8.467173851789237e-06, "loss": 0.575, "step": 10027, "task_loss": 0.7306531667709351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.520441472530365, "epoch": 8.48, "learning_rate": 8.462477693246924e-06, "loss": 0.5139, "step": 10028, "task_loss": 0.6556795239448547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6460514068603516, "epoch": 8.48, "learning_rate": 8.457781534704612e-06, "loss": 0.7458, "step": 10029, "task_loss": 0.8729152679443359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4478207230567932, "epoch": 8.48, "learning_rate": 8.4530853761623e-06, "loss": 0.5118, "step": 10030, "task_loss": 0.461703360080719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7045949697494507, "epoch": 8.48, "learning_rate": 8.448389217619986e-06, "loss": 0.5852, "step": 10031, "task_loss": 1.1685397624969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43410611152648926, "epoch": 8.48, "learning_rate": 8.443693059077674e-06, "loss": 0.5269, "step": 10032, "task_loss": 0.7885587811470032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7907038331031799, "epoch": 8.48, "learning_rate": 8.438996900535362e-06, "loss": 0.6419, "step": 10033, "task_loss": 0.4274558424949646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31654179096221924, "epoch": 8.48, "learning_rate": 8.43430074199305e-06, "loss": 0.578, "step": 10034, "task_loss": 0.5383250713348389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44962847232818604, "epoch": 8.48, "learning_rate": 8.429604583450738e-06, "loss": 0.6329, "step": 10035, "task_loss": 0.593730628490448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5202362537384033, "epoch": 8.48, "learning_rate": 8.424908424908426e-06, "loss": 0.4515, "step": 10036, "task_loss": 0.7812157869338989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4264167547225952, "epoch": 8.48, "learning_rate": 8.420212266366113e-06, "loss": 0.5212, "step": 10037, "task_loss": 0.8531744480133057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9380546808242798, "epoch": 8.48, "learning_rate": 8.4155161078238e-06, "loss": 0.6167, "step": 10038, "task_loss": 0.6545056700706482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6099013090133667, "epoch": 8.49, "learning_rate": 8.410819949281489e-06, "loss": 0.529, "step": 10039, "task_loss": 1.4963222742080688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6342105269432068, "epoch": 8.49, "learning_rate": 8.406123790739177e-06, "loss": 0.5504, "step": 10040, "task_loss": 0.5106306672096252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5394735932350159, "epoch": 8.49, "learning_rate": 8.401427632196863e-06, "loss": 0.5927, "step": 10041, "task_loss": 0.3521052300930023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.942297101020813, "epoch": 8.49, "learning_rate": 8.396731473654551e-06, "loss": 0.752, "step": 10042, "task_loss": 1.205306053161621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5761298537254333, "epoch": 8.49, "learning_rate": 8.39203531511224e-06, "loss": 0.6953, "step": 10043, "task_loss": 1.1925389766693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6496156454086304, "epoch": 8.49, "learning_rate": 8.387339156569926e-06, "loss": 0.5735, "step": 10044, "task_loss": 0.939959704875946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9007594585418701, "epoch": 8.49, "learning_rate": 8.382642998027614e-06, "loss": 0.767, "step": 10045, "task_loss": 0.8220113515853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7251944541931152, "epoch": 8.49, "learning_rate": 8.377946839485302e-06, "loss": 0.5073, "step": 10046, "task_loss": 1.5946019887924194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9293487071990967, "epoch": 8.49, "learning_rate": 8.373250680942988e-06, "loss": 0.5007, "step": 10047, "task_loss": 0.633931577205658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5057868957519531, "epoch": 8.49, "learning_rate": 8.368554522400676e-06, "loss": 0.5622, "step": 10048, "task_loss": 0.7661393284797668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.378428190946579, "epoch": 8.49, "learning_rate": 8.363858363858364e-06, "loss": 0.5257, "step": 10049, "task_loss": 0.279744952917099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7516874074935913, "epoch": 8.5, "learning_rate": 8.35916220531605e-06, "loss": 0.6709, "step": 10050, "task_loss": 0.2894376218318939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40842193365097046, "epoch": 8.5, "learning_rate": 8.35446604677374e-06, "loss": 0.4575, "step": 10051, "task_loss": 0.13369153439998627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6151243448257446, "epoch": 8.5, "learning_rate": 8.349769888231428e-06, "loss": 0.6891, "step": 10052, "task_loss": 0.6247830986976624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5475522875785828, "epoch": 8.5, "learning_rate": 8.345073729689114e-06, "loss": 0.5901, "step": 10053, "task_loss": 1.1218715906143188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7991716861724854, "epoch": 8.5, "learning_rate": 8.340377571146803e-06, "loss": 0.6654, "step": 10054, "task_loss": 0.6498648524284363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7038369178771973, "epoch": 8.5, "learning_rate": 8.33568141260449e-06, "loss": 0.5413, "step": 10055, "task_loss": 0.2541882395744324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8381092548370361, "epoch": 8.5, "learning_rate": 8.330985254062177e-06, "loss": 0.6499, "step": 10056, "task_loss": 1.004915714263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5427372455596924, "epoch": 8.5, "learning_rate": 8.326289095519865e-06, "loss": 0.5748, "step": 10057, "task_loss": 1.2405643463134766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45553454756736755, "epoch": 8.5, "learning_rate": 8.321592936977553e-06, "loss": 0.6636, "step": 10058, "task_loss": 0.754409909248352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5653579235076904, "epoch": 8.5, "learning_rate": 8.316896778435241e-06, "loss": 0.508, "step": 10059, "task_loss": 1.033448576927185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.740765392780304, "epoch": 8.5, "learning_rate": 8.312200619892927e-06, "loss": 0.5506, "step": 10060, "task_loss": 0.3072812259197235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5614935159683228, "epoch": 8.5, "learning_rate": 8.307504461350615e-06, "loss": 0.686, "step": 10061, "task_loss": 1.119915246963501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4209464192390442, "epoch": 8.51, "learning_rate": 8.302808302808303e-06, "loss": 0.4828, "step": 10062, "task_loss": 0.6324139833450317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4343408942222595, "epoch": 8.51, "learning_rate": 8.29811214426599e-06, "loss": 0.4699, "step": 10063, "task_loss": 0.14182300865650177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44105952978134155, "epoch": 8.51, "learning_rate": 8.293415985723678e-06, "loss": 0.5473, "step": 10064, "task_loss": 0.4118945598602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8978383541107178, "epoch": 8.51, "learning_rate": 8.288719827181366e-06, "loss": 0.6892, "step": 10065, "task_loss": 1.1369953155517578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27194952964782715, "epoch": 8.51, "learning_rate": 8.284023668639054e-06, "loss": 0.6357, "step": 10066, "task_loss": 0.3206271827220917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4990245997905731, "epoch": 8.51, "learning_rate": 8.279327510096742e-06, "loss": 0.7337, "step": 10067, "task_loss": 0.34128084778785706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.512772798538208, "epoch": 8.51, "learning_rate": 8.27463135155443e-06, "loss": 0.6719, "step": 10068, "task_loss": 0.600698709487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31321442127227783, "epoch": 8.51, "learning_rate": 8.269935193012116e-06, "loss": 0.4469, "step": 10069, "task_loss": 0.5037088394165039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3567098081111908, "epoch": 8.51, "learning_rate": 8.265239034469804e-06, "loss": 0.452, "step": 10070, "task_loss": 0.938822329044342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4847944378852844, "epoch": 8.51, "learning_rate": 8.260542875927492e-06, "loss": 0.5789, "step": 10071, "task_loss": 0.44490042328834534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5847632884979248, "epoch": 8.51, "learning_rate": 8.255846717385179e-06, "loss": 0.5013, "step": 10072, "task_loss": 0.9204127788543701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9299798011779785, "epoch": 8.51, "learning_rate": 8.251150558842867e-06, "loss": 0.598, "step": 10073, "task_loss": 0.9591830372810364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6519808769226074, "epoch": 8.52, "learning_rate": 8.246454400300555e-06, "loss": 0.6216, "step": 10074, "task_loss": 0.8872933387756348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3964068293571472, "epoch": 8.52, "learning_rate": 8.241758241758243e-06, "loss": 0.5555, "step": 10075, "task_loss": 0.7764999866485596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4139261841773987, "epoch": 8.52, "learning_rate": 8.237062083215929e-06, "loss": 0.699, "step": 10076, "task_loss": 0.8838322162628174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9584897756576538, "epoch": 8.52, "learning_rate": 8.232365924673617e-06, "loss": 0.7196, "step": 10077, "task_loss": 0.9143103957176208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5160485506057739, "epoch": 8.52, "learning_rate": 8.227669766131305e-06, "loss": 0.5718, "step": 10078, "task_loss": 0.2763226330280304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0263718366622925, "epoch": 8.52, "learning_rate": 8.222973607588992e-06, "loss": 0.8316, "step": 10079, "task_loss": 0.9081021547317505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7080621719360352, "epoch": 8.52, "learning_rate": 8.21827744904668e-06, "loss": 0.4945, "step": 10080, "task_loss": 1.4055330753326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6522591710090637, "epoch": 8.52, "learning_rate": 8.213581290504368e-06, "loss": 0.5695, "step": 10081, "task_loss": 0.3538375496864319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3671731948852539, "epoch": 8.52, "learning_rate": 8.208885131962056e-06, "loss": 0.6089, "step": 10082, "task_loss": 0.9851669073104858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5110629200935364, "epoch": 8.52, "learning_rate": 8.204188973419744e-06, "loss": 0.706, "step": 10083, "task_loss": 0.8025466203689575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6208304166793823, "epoch": 8.52, "learning_rate": 8.199492814877432e-06, "loss": 0.7029, "step": 10084, "task_loss": 0.7408548593521118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5281844735145569, "epoch": 8.52, "learning_rate": 8.194796656335118e-06, "loss": 0.6207, "step": 10085, "task_loss": 0.10437336564064026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7888798713684082, "epoch": 8.53, "learning_rate": 8.190100497792806e-06, "loss": 0.7803, "step": 10086, "task_loss": 1.1193461418151855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41526296734809875, "epoch": 8.53, "learning_rate": 8.185404339250494e-06, "loss": 0.6, "step": 10087, "task_loss": 0.5470412373542786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6257859468460083, "epoch": 8.53, "learning_rate": 8.18070818070818e-06, "loss": 0.5082, "step": 10088, "task_loss": 1.1351027488708496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3701359033584595, "epoch": 8.53, "learning_rate": 8.176012022165868e-06, "loss": 0.4464, "step": 10089, "task_loss": 1.101163387298584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5960867404937744, "epoch": 8.53, "learning_rate": 8.171315863623556e-06, "loss": 0.5912, "step": 10090, "task_loss": 0.7456415891647339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4089732766151428, "epoch": 8.53, "learning_rate": 8.166619705081245e-06, "loss": 0.5515, "step": 10091, "task_loss": 1.0943807363510132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5639589428901672, "epoch": 8.53, "learning_rate": 8.161923546538931e-06, "loss": 0.6531, "step": 10092, "task_loss": 0.4710702896118164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7174573540687561, "epoch": 8.53, "learning_rate": 8.157227387996619e-06, "loss": 0.4588, "step": 10093, "task_loss": 0.32601064443588257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30598363280296326, "epoch": 8.53, "learning_rate": 8.152531229454307e-06, "loss": 0.5002, "step": 10094, "task_loss": 0.17186155915260315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42604756355285645, "epoch": 8.53, "learning_rate": 8.147835070911993e-06, "loss": 0.4912, "step": 10095, "task_loss": 0.2708716094493866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5390046238899231, "epoch": 8.53, "learning_rate": 8.143138912369681e-06, "loss": 0.5562, "step": 10096, "task_loss": 0.38354775309562683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43524056673049927, "epoch": 8.53, "learning_rate": 8.138442753827371e-06, "loss": 0.5257, "step": 10097, "task_loss": 0.6912310719490051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7595468163490295, "epoch": 8.54, "learning_rate": 8.133746595285057e-06, "loss": 0.4796, "step": 10098, "task_loss": 1.2664991617202759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37137699127197266, "epoch": 8.54, "learning_rate": 8.129050436742745e-06, "loss": 0.5173, "step": 10099, "task_loss": 0.7199193835258484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3278194069862366, "epoch": 8.54, "learning_rate": 8.124354278200433e-06, "loss": 0.3952, "step": 10100, "task_loss": 0.10992012172937393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.594543993473053, "epoch": 8.54, "learning_rate": 8.11965811965812e-06, "loss": 0.5559, "step": 10101, "task_loss": 0.48877090215682983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4837808609008789, "epoch": 8.54, "learning_rate": 8.114961961115808e-06, "loss": 0.5318, "step": 10102, "task_loss": 0.9113153219223022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32818490266799927, "epoch": 8.54, "learning_rate": 8.110265802573496e-06, "loss": 0.6883, "step": 10103, "task_loss": 0.29340091347694397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38841214776039124, "epoch": 8.54, "learning_rate": 8.105569644031182e-06, "loss": 0.5422, "step": 10104, "task_loss": 0.9321341514587402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5602864027023315, "epoch": 8.54, "learning_rate": 8.10087348548887e-06, "loss": 0.5682, "step": 10105, "task_loss": 0.8022961020469666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25351881980895996, "epoch": 8.54, "learning_rate": 8.096177326946558e-06, "loss": 0.5816, "step": 10106, "task_loss": 0.11937905102968216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7187318801879883, "epoch": 8.54, "learning_rate": 8.091481168404246e-06, "loss": 0.7374, "step": 10107, "task_loss": 0.9248560667037964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.833742618560791, "epoch": 8.54, "learning_rate": 8.086785009861933e-06, "loss": 0.4526, "step": 10108, "task_loss": 0.6479013562202454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6749410033226013, "epoch": 8.54, "learning_rate": 8.08208885131962e-06, "loss": 0.6015, "step": 10109, "task_loss": 0.16060172021389008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.518153727054596, "epoch": 8.55, "learning_rate": 8.077392692777309e-06, "loss": 0.6341, "step": 10110, "task_loss": 0.3851993680000305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26090601086616516, "epoch": 8.55, "learning_rate": 8.072696534234995e-06, "loss": 0.441, "step": 10111, "task_loss": 0.5293717980384827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5353056192398071, "epoch": 8.55, "learning_rate": 8.068000375692685e-06, "loss": 0.4437, "step": 10112, "task_loss": 0.6285352110862732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4880635142326355, "epoch": 8.55, "learning_rate": 8.063304217150373e-06, "loss": 0.5068, "step": 10113, "task_loss": 0.9870822429656982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5263556241989136, "epoch": 8.55, "learning_rate": 8.058608058608059e-06, "loss": 0.6587, "step": 10114, "task_loss": 1.6000876426696777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7022629976272583, "epoch": 8.55, "learning_rate": 8.053911900065747e-06, "loss": 0.5666, "step": 10115, "task_loss": 0.804175078868866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5355299711227417, "epoch": 8.55, "learning_rate": 8.049215741523435e-06, "loss": 0.6895, "step": 10116, "task_loss": 1.548316478729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41749125719070435, "epoch": 8.55, "learning_rate": 8.044519582981122e-06, "loss": 0.6628, "step": 10117, "task_loss": 2.3394904136657715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37052983045578003, "epoch": 8.55, "learning_rate": 8.03982342443881e-06, "loss": 0.4131, "step": 10118, "task_loss": 0.9963799715042114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45209747552871704, "epoch": 8.55, "learning_rate": 8.035127265896498e-06, "loss": 0.5505, "step": 10119, "task_loss": 0.5448580980300903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28125521540641785, "epoch": 8.55, "learning_rate": 8.030431107354184e-06, "loss": 0.4333, "step": 10120, "task_loss": 0.7575242519378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5642576813697815, "epoch": 8.56, "learning_rate": 8.025734948811872e-06, "loss": 0.6924, "step": 10121, "task_loss": 0.7886309623718262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4827713966369629, "epoch": 8.56, "learning_rate": 8.02103879026956e-06, "loss": 0.4791, "step": 10122, "task_loss": 0.13242892920970917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49208471179008484, "epoch": 8.56, "learning_rate": 8.016342631727246e-06, "loss": 0.5081, "step": 10123, "task_loss": 0.7585548758506775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5045320987701416, "epoch": 8.56, "learning_rate": 8.011646473184934e-06, "loss": 0.4392, "step": 10124, "task_loss": 0.18711814284324646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27482688426971436, "epoch": 8.56, "learning_rate": 8.006950314642622e-06, "loss": 0.506, "step": 10125, "task_loss": 0.0407571904361248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6644451022148132, "epoch": 8.56, "learning_rate": 8.00225415610031e-06, "loss": 0.5756, "step": 10126, "task_loss": 0.9353916049003601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2864559590816498, "epoch": 8.56, "learning_rate": 7.997557997557997e-06, "loss": 0.4937, "step": 10127, "task_loss": 0.23424577713012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.329814612865448, "epoch": 8.56, "learning_rate": 7.992861839015687e-06, "loss": 0.4879, "step": 10128, "task_loss": 0.12384038418531418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3870355188846588, "epoch": 8.56, "learning_rate": 7.988165680473373e-06, "loss": 0.509, "step": 10129, "task_loss": 0.6031726598739624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.617840588092804, "epoch": 8.56, "learning_rate": 7.983469521931061e-06, "loss": 0.6792, "step": 10130, "task_loss": 0.475353479385376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49035799503326416, "epoch": 8.56, "learning_rate": 7.978773363388749e-06, "loss": 0.5407, "step": 10131, "task_loss": 0.2951951324939728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7206923961639404, "epoch": 8.56, "learning_rate": 7.974077204846437e-06, "loss": 0.5074, "step": 10132, "task_loss": 1.7934777736663818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7381411790847778, "epoch": 8.57, "learning_rate": 7.969381046304123e-06, "loss": 0.561, "step": 10133, "task_loss": 0.8269683718681335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46145227551460266, "epoch": 8.57, "learning_rate": 7.964684887761811e-06, "loss": 0.6692, "step": 10134, "task_loss": 0.7193881273269653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6436439156532288, "epoch": 8.57, "learning_rate": 7.9599887292195e-06, "loss": 0.5558, "step": 10135, "task_loss": 0.4468909204006195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42139285802841187, "epoch": 8.57, "learning_rate": 7.955292570677186e-06, "loss": 0.5849, "step": 10136, "task_loss": 0.8121824860572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20668308436870575, "epoch": 8.57, "learning_rate": 7.950596412134874e-06, "loss": 0.4875, "step": 10137, "task_loss": 0.1131717711687088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6257491111755371, "epoch": 8.57, "learning_rate": 7.945900253592562e-06, "loss": 0.4826, "step": 10138, "task_loss": 0.3529435396194458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5894378423690796, "epoch": 8.57, "learning_rate": 7.941204095050248e-06, "loss": 0.6008, "step": 10139, "task_loss": 0.9032185077667236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3426850438117981, "epoch": 8.57, "learning_rate": 7.936507936507936e-06, "loss": 0.6847, "step": 10140, "task_loss": 0.4977544844150543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4478592574596405, "epoch": 8.57, "learning_rate": 7.931811777965624e-06, "loss": 0.4915, "step": 10141, "task_loss": 0.621381938457489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4900103807449341, "epoch": 8.57, "learning_rate": 7.927115619423312e-06, "loss": 0.3349, "step": 10142, "task_loss": 0.5146083235740662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.579024076461792, "epoch": 8.57, "learning_rate": 7.922419460881e-06, "loss": 0.5346, "step": 10143, "task_loss": 0.6735717058181763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3790475130081177, "epoch": 8.57, "learning_rate": 7.917723302338688e-06, "loss": 0.4871, "step": 10144, "task_loss": 0.6251894235610962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5460718870162964, "epoch": 8.58, "learning_rate": 7.913027143796375e-06, "loss": 0.5633, "step": 10145, "task_loss": 0.3767307996749878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2993364930152893, "epoch": 8.58, "learning_rate": 7.908330985254063e-06, "loss": 0.4737, "step": 10146, "task_loss": 0.21420060098171234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9127258062362671, "epoch": 8.58, "learning_rate": 7.90363482671175e-06, "loss": 0.4906, "step": 10147, "task_loss": 0.9015702605247498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42848777770996094, "epoch": 8.58, "learning_rate": 7.898938668169439e-06, "loss": 0.6728, "step": 10148, "task_loss": 0.7727674245834351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3619275987148285, "epoch": 8.58, "learning_rate": 7.894242509627125e-06, "loss": 0.5521, "step": 10149, "task_loss": 0.8505379557609558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8534202575683594, "epoch": 8.58, "learning_rate": 7.889546351084813e-06, "loss": 0.5172, "step": 10150, "task_loss": 1.1356754302978516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.675075113773346, "epoch": 8.58, "learning_rate": 7.884850192542501e-06, "loss": 0.6709, "step": 10151, "task_loss": 0.39349278807640076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5910518765449524, "epoch": 8.58, "learning_rate": 7.880154034000187e-06, "loss": 0.5614, "step": 10152, "task_loss": 0.5043654441833496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6680186986923218, "epoch": 8.58, "learning_rate": 7.875457875457876e-06, "loss": 0.7377, "step": 10153, "task_loss": 1.233729600906372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4576263427734375, "epoch": 8.58, "learning_rate": 7.870761716915564e-06, "loss": 0.5735, "step": 10154, "task_loss": 0.28425025939941406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6808176040649414, "epoch": 8.58, "learning_rate": 7.86606555837325e-06, "loss": 0.7476, "step": 10155, "task_loss": 0.7247397303581238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43139755725860596, "epoch": 8.58, "learning_rate": 7.861369399830938e-06, "loss": 0.502, "step": 10156, "task_loss": 0.44355565309524536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4331018328666687, "epoch": 8.59, "learning_rate": 7.856673241288626e-06, "loss": 0.612, "step": 10157, "task_loss": 0.12340562045574188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4998604655265808, "epoch": 8.59, "learning_rate": 7.851977082746314e-06, "loss": 0.5681, "step": 10158, "task_loss": 0.44427070021629333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3837619721889496, "epoch": 8.59, "learning_rate": 7.847280924204002e-06, "loss": 0.5324, "step": 10159, "task_loss": 0.5479989647865295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38312530517578125, "epoch": 8.59, "learning_rate": 7.84258476566169e-06, "loss": 0.488, "step": 10160, "task_loss": 0.4153583347797394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5629072785377502, "epoch": 8.59, "learning_rate": 7.837888607119376e-06, "loss": 0.5462, "step": 10161, "task_loss": 0.45182278752326965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6474666595458984, "epoch": 8.59, "learning_rate": 7.833192448577064e-06, "loss": 0.6333, "step": 10162, "task_loss": 1.2355743646621704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42465245723724365, "epoch": 8.59, "learning_rate": 7.828496290034752e-06, "loss": 0.5568, "step": 10163, "task_loss": 0.9515724778175354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6284161806106567, "epoch": 8.59, "learning_rate": 7.82380013149244e-06, "loss": 0.5225, "step": 10164, "task_loss": 0.5514670610427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6502808332443237, "epoch": 8.59, "learning_rate": 7.819103972950127e-06, "loss": 0.533, "step": 10165, "task_loss": 0.7140481472015381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3145908713340759, "epoch": 8.59, "learning_rate": 7.814407814407815e-06, "loss": 0.4809, "step": 10166, "task_loss": 0.25902023911476135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43434396386146545, "epoch": 8.59, "learning_rate": 7.809711655865503e-06, "loss": 0.434, "step": 10167, "task_loss": 0.42399272322654724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34219855070114136, "epoch": 8.59, "learning_rate": 7.80501549732319e-06, "loss": 0.4421, "step": 10168, "task_loss": 0.46535056829452515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6890006065368652, "epoch": 8.6, "learning_rate": 7.800319338780877e-06, "loss": 0.6663, "step": 10169, "task_loss": 0.7872116565704346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6199469566345215, "epoch": 8.6, "learning_rate": 7.795623180238565e-06, "loss": 0.6704, "step": 10170, "task_loss": 1.1878571510314941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5149555206298828, "epoch": 8.6, "learning_rate": 7.790927021696252e-06, "loss": 0.5896, "step": 10171, "task_loss": 0.4224426746368408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.526591420173645, "epoch": 8.6, "learning_rate": 7.78623086315394e-06, "loss": 0.5096, "step": 10172, "task_loss": 0.5713741779327393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.531842052936554, "epoch": 8.6, "learning_rate": 7.781534704611628e-06, "loss": 0.6403, "step": 10173, "task_loss": 0.27138659358024597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5089507699012756, "epoch": 8.6, "learning_rate": 7.776838546069316e-06, "loss": 0.6328, "step": 10174, "task_loss": 1.2283568382263184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28154557943344116, "epoch": 8.6, "learning_rate": 7.772142387527004e-06, "loss": 0.4644, "step": 10175, "task_loss": 0.23503310978412628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.691821277141571, "epoch": 8.6, "learning_rate": 7.767446228984692e-06, "loss": 0.6169, "step": 10176, "task_loss": 0.7231013178825378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8408421277999878, "epoch": 8.6, "learning_rate": 7.762750070442378e-06, "loss": 0.6201, "step": 10177, "task_loss": 0.5247149467468262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4279100000858307, "epoch": 8.6, "learning_rate": 7.758053911900066e-06, "loss": 0.5743, "step": 10178, "task_loss": 0.8061252236366272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4396101236343384, "epoch": 8.6, "learning_rate": 7.753357753357754e-06, "loss": 0.5312, "step": 10179, "task_loss": 0.214432954788208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34010961651802063, "epoch": 8.6, "learning_rate": 7.748661594815442e-06, "loss": 0.6092, "step": 10180, "task_loss": 0.014869332313537598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8323996067047119, "epoch": 8.61, "learning_rate": 7.743965436273129e-06, "loss": 0.712, "step": 10181, "task_loss": 0.7657367587089539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6642820835113525, "epoch": 8.61, "learning_rate": 7.739269277730817e-06, "loss": 0.6302, "step": 10182, "task_loss": 0.9210270643234253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3710958957672119, "epoch": 8.61, "learning_rate": 7.734573119188505e-06, "loss": 0.3666, "step": 10183, "task_loss": 0.7601845264434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48884764313697815, "epoch": 8.61, "learning_rate": 7.729876960646191e-06, "loss": 0.6154, "step": 10184, "task_loss": 0.5419313311576843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6243571639060974, "epoch": 8.61, "learning_rate": 7.725180802103879e-06, "loss": 0.5249, "step": 10185, "task_loss": 1.172163724899292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5880721807479858, "epoch": 8.61, "learning_rate": 7.720484643561567e-06, "loss": 0.4962, "step": 10186, "task_loss": 0.8736922740936279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40281474590301514, "epoch": 8.61, "learning_rate": 7.715788485019253e-06, "loss": 0.5119, "step": 10187, "task_loss": 0.9788747429847717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4510864019393921, "epoch": 8.61, "learning_rate": 7.711092326476941e-06, "loss": 0.5376, "step": 10188, "task_loss": 0.886990487575531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6114735007286072, "epoch": 8.61, "learning_rate": 7.70639616793463e-06, "loss": 0.5547, "step": 10189, "task_loss": 0.23636624217033386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.405250608921051, "epoch": 8.61, "learning_rate": 7.701700009392318e-06, "loss": 0.5171, "step": 10190, "task_loss": 1.4366767406463623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5506914258003235, "epoch": 8.61, "learning_rate": 7.697003850850006e-06, "loss": 0.4742, "step": 10191, "task_loss": 0.33670493960380554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45559728145599365, "epoch": 8.61, "learning_rate": 7.692307692307694e-06, "loss": 0.5543, "step": 10192, "task_loss": 0.48883262276649475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6641194224357605, "epoch": 8.62, "learning_rate": 7.68761153376538e-06, "loss": 0.5918, "step": 10193, "task_loss": 0.6632692217826843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46645957231521606, "epoch": 8.62, "learning_rate": 7.682915375223068e-06, "loss": 0.5451, "step": 10194, "task_loss": 0.8866949081420898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4706856608390808, "epoch": 8.62, "learning_rate": 7.678219216680756e-06, "loss": 0.6006, "step": 10195, "task_loss": 1.0360324382781982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4576830267906189, "epoch": 8.62, "learning_rate": 7.673523058138444e-06, "loss": 0.6444, "step": 10196, "task_loss": 0.6196795701980591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41327643394470215, "epoch": 8.62, "learning_rate": 7.66882689959613e-06, "loss": 0.6056, "step": 10197, "task_loss": 0.19692403078079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48437443375587463, "epoch": 8.62, "learning_rate": 7.664130741053818e-06, "loss": 0.4912, "step": 10198, "task_loss": 0.5671310424804688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4106687307357788, "epoch": 8.62, "learning_rate": 7.659434582511506e-06, "loss": 0.5622, "step": 10199, "task_loss": 0.5250669121742249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4318590760231018, "epoch": 8.62, "learning_rate": 7.654738423969193e-06, "loss": 0.6081, "step": 10200, "task_loss": 1.0596551895141602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.504088282585144, "epoch": 8.62, "learning_rate": 7.65004226542688e-06, "loss": 0.6584, "step": 10201, "task_loss": 0.4361553490161896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5513283014297485, "epoch": 8.62, "learning_rate": 7.645346106884569e-06, "loss": 0.6109, "step": 10202, "task_loss": 1.122445821762085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5457226037979126, "epoch": 8.62, "learning_rate": 7.640649948342255e-06, "loss": 0.6439, "step": 10203, "task_loss": 0.7230810523033142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6957589387893677, "epoch": 8.63, "learning_rate": 7.635953789799943e-06, "loss": 0.5725, "step": 10204, "task_loss": 1.2208600044250488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47404998540878296, "epoch": 8.63, "learning_rate": 7.631257631257633e-06, "loss": 0.655, "step": 10205, "task_loss": 0.7040433287620544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41922643780708313, "epoch": 8.63, "learning_rate": 7.62656147271532e-06, "loss": 0.5357, "step": 10206, "task_loss": 1.1510061025619507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3961447775363922, "epoch": 8.63, "learning_rate": 7.621865314173007e-06, "loss": 0.6358, "step": 10207, "task_loss": 0.3043938875198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8081384897232056, "epoch": 8.63, "learning_rate": 7.6171691556306945e-06, "loss": 0.6044, "step": 10208, "task_loss": 0.9309658408164978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5744142532348633, "epoch": 8.63, "learning_rate": 7.6124729970883825e-06, "loss": 0.4515, "step": 10209, "task_loss": 1.6275269985198975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.626166820526123, "epoch": 8.63, "learning_rate": 7.60777683854607e-06, "loss": 0.5905, "step": 10210, "task_loss": 0.8756242990493774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4888893961906433, "epoch": 8.63, "learning_rate": 7.603080680003758e-06, "loss": 0.4547, "step": 10211, "task_loss": 1.5373225212097168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46328017115592957, "epoch": 8.63, "learning_rate": 7.598384521461445e-06, "loss": 0.6491, "step": 10212, "task_loss": 0.7292001247406006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5929757356643677, "epoch": 8.63, "learning_rate": 7.593688362919132e-06, "loss": 0.6086, "step": 10213, "task_loss": 0.9026742577552795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6763771176338196, "epoch": 8.63, "learning_rate": 7.58899220437682e-06, "loss": 0.5799, "step": 10214, "task_loss": 0.40101462602615356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9702361822128296, "epoch": 8.63, "learning_rate": 7.584296045834507e-06, "loss": 0.7785, "step": 10215, "task_loss": 1.3708608150482178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7873521447181702, "epoch": 8.64, "learning_rate": 7.579599887292195e-06, "loss": 0.6442, "step": 10216, "task_loss": 1.1339277029037476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.548207700252533, "epoch": 8.64, "learning_rate": 7.5749037287498826e-06, "loss": 0.4793, "step": 10217, "task_loss": 0.3917886018753052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49768316745758057, "epoch": 8.64, "learning_rate": 7.57020757020757e-06, "loss": 0.6862, "step": 10218, "task_loss": 0.9330788254737854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4053627848625183, "epoch": 8.64, "learning_rate": 7.565511411665258e-06, "loss": 0.4753, "step": 10219, "task_loss": 0.6663283705711365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6307308673858643, "epoch": 8.64, "learning_rate": 7.560815253122947e-06, "loss": 0.5704, "step": 10220, "task_loss": 1.2533655166625977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7778764963150024, "epoch": 8.64, "learning_rate": 7.556119094580634e-06, "loss": 0.5699, "step": 10221, "task_loss": 1.328901767730713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41368865966796875, "epoch": 8.64, "learning_rate": 7.551422936038322e-06, "loss": 0.4238, "step": 10222, "task_loss": 0.8493715524673462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3119378089904785, "epoch": 8.64, "learning_rate": 7.546726777496009e-06, "loss": 0.5276, "step": 10223, "task_loss": 0.5483587384223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43674248456954956, "epoch": 8.64, "learning_rate": 7.542030618953696e-06, "loss": 0.5052, "step": 10224, "task_loss": 0.14912505447864532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3352706730365753, "epoch": 8.64, "learning_rate": 7.537334460411384e-06, "loss": 0.5695, "step": 10225, "task_loss": 0.3992350399494171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6370732188224792, "epoch": 8.64, "learning_rate": 7.5326383018690715e-06, "loss": 0.6808, "step": 10226, "task_loss": 0.11418670415878296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47148245573043823, "epoch": 8.64, "learning_rate": 7.5279421433267595e-06, "loss": 0.5324, "step": 10227, "task_loss": 1.2116204500198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6872268915176392, "epoch": 8.65, "learning_rate": 7.523245984784447e-06, "loss": 0.5556, "step": 10228, "task_loss": 0.47657063603401184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42959070205688477, "epoch": 8.65, "learning_rate": 7.518549826242134e-06, "loss": 0.6317, "step": 10229, "task_loss": 0.5774355530738831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6065131425857544, "epoch": 8.65, "learning_rate": 7.513853667699822e-06, "loss": 0.6498, "step": 10230, "task_loss": 1.0525391101837158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3681030869483948, "epoch": 8.65, "learning_rate": 7.509157509157509e-06, "loss": 0.561, "step": 10231, "task_loss": 0.3069378137588501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3902667164802551, "epoch": 8.65, "learning_rate": 7.504461350615197e-06, "loss": 0.5837, "step": 10232, "task_loss": 0.3778567910194397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8332396149635315, "epoch": 8.65, "learning_rate": 7.499765192072884e-06, "loss": 0.4939, "step": 10233, "task_loss": 0.9043023586273193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4237216114997864, "epoch": 8.65, "learning_rate": 7.4950690335305715e-06, "loss": 0.6901, "step": 10234, "task_loss": 0.6827937364578247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6342553496360779, "epoch": 8.65, "learning_rate": 7.4903728749882596e-06, "loss": 0.6429, "step": 10235, "task_loss": 1.0258148908615112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39313584566116333, "epoch": 8.65, "learning_rate": 7.4856767164459484e-06, "loss": 0.4252, "step": 10236, "task_loss": 1.0365196466445923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25762152671813965, "epoch": 8.65, "learning_rate": 7.480980557903636e-06, "loss": 0.4732, "step": 10237, "task_loss": 0.3036763370037079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8248552680015564, "epoch": 8.65, "learning_rate": 7.476284399361324e-06, "loss": 0.6466, "step": 10238, "task_loss": 0.27214887738227844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8097308874130249, "epoch": 8.65, "learning_rate": 7.471588240819011e-06, "loss": 0.5922, "step": 10239, "task_loss": 0.7543125748634338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3951740562915802, "epoch": 8.66, "learning_rate": 7.466892082276698e-06, "loss": 0.5256, "step": 10240, "task_loss": 1.6051669120788574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37878715991973877, "epoch": 8.66, "learning_rate": 7.462195923734386e-06, "loss": 0.5728, "step": 10241, "task_loss": 0.06755638867616653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7129075527191162, "epoch": 8.66, "learning_rate": 7.457499765192073e-06, "loss": 0.5835, "step": 10242, "task_loss": 0.6745591759681702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6269041299819946, "epoch": 8.66, "learning_rate": 7.4528036066497604e-06, "loss": 0.5542, "step": 10243, "task_loss": 0.3346305787563324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1056039333343506, "epoch": 8.66, "learning_rate": 7.4481074481074485e-06, "loss": 0.8226, "step": 10244, "task_loss": 1.3281147480010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5918039083480835, "epoch": 8.66, "learning_rate": 7.443411289565136e-06, "loss": 0.5547, "step": 10245, "task_loss": 0.7364872097969055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4488946795463562, "epoch": 8.66, "learning_rate": 7.438715131022824e-06, "loss": 0.5398, "step": 10246, "task_loss": 0.5460629463195801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45832908153533936, "epoch": 8.66, "learning_rate": 7.434018972480511e-06, "loss": 0.6335, "step": 10247, "task_loss": 1.8058630228042603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6539885997772217, "epoch": 8.66, "learning_rate": 7.429322813938198e-06, "loss": 0.6427, "step": 10248, "task_loss": 1.0286424160003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3697872459888458, "epoch": 8.66, "learning_rate": 7.424626655395886e-06, "loss": 0.4917, "step": 10249, "task_loss": 0.12179650366306305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5173530578613281, "epoch": 8.66, "learning_rate": 7.419930496853573e-06, "loss": 0.6341, "step": 10250, "task_loss": 1.1463367938995361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4224802553653717, "epoch": 8.66, "learning_rate": 7.415234338311262e-06, "loss": 0.6286, "step": 10251, "task_loss": 0.13439907133579254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6051065921783447, "epoch": 8.67, "learning_rate": 7.41053817976895e-06, "loss": 0.69, "step": 10252, "task_loss": 0.7341217994689941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.409909725189209, "epoch": 8.67, "learning_rate": 7.405842021226637e-06, "loss": 0.5283, "step": 10253, "task_loss": 1.1080448627471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5361477136611938, "epoch": 8.67, "learning_rate": 7.4011458626843246e-06, "loss": 0.5483, "step": 10254, "task_loss": 0.6561297178268433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8686103820800781, "epoch": 8.67, "learning_rate": 7.396449704142013e-06, "loss": 0.5504, "step": 10255, "task_loss": 1.025373101234436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.565653920173645, "epoch": 8.67, "learning_rate": 7.3917535455997e-06, "loss": 0.519, "step": 10256, "task_loss": 0.6099762916564941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41845130920410156, "epoch": 8.67, "learning_rate": 7.387057387057388e-06, "loss": 0.5508, "step": 10257, "task_loss": 0.6322251558303833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.503254771232605, "epoch": 8.67, "learning_rate": 7.382361228515075e-06, "loss": 0.5712, "step": 10258, "task_loss": 0.5466342568397522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42690950632095337, "epoch": 8.67, "learning_rate": 7.377665069972762e-06, "loss": 0.4372, "step": 10259, "task_loss": 0.34407803416252136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47467920184135437, "epoch": 8.67, "learning_rate": 7.37296891143045e-06, "loss": 0.5751, "step": 10260, "task_loss": 0.5128033757209778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7595347166061401, "epoch": 8.67, "learning_rate": 7.3682727528881374e-06, "loss": 0.565, "step": 10261, "task_loss": 1.2179811000823975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6114000678062439, "epoch": 8.67, "learning_rate": 7.3635765943458255e-06, "loss": 0.5584, "step": 10262, "task_loss": 0.29443392157554626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4346359074115753, "epoch": 8.67, "learning_rate": 7.358880435803513e-06, "loss": 0.4824, "step": 10263, "task_loss": 0.5224870443344116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5443424582481384, "epoch": 8.68, "learning_rate": 7.3541842772612e-06, "loss": 0.4592, "step": 10264, "task_loss": 0.7487624287605286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8096593618392944, "epoch": 8.68, "learning_rate": 7.349488118718888e-06, "loss": 0.617, "step": 10265, "task_loss": 1.1723335981369019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6420505046844482, "epoch": 8.68, "learning_rate": 7.344791960176575e-06, "loss": 0.503, "step": 10266, "task_loss": 1.2138831615447998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8059666156768799, "epoch": 8.68, "learning_rate": 7.340095801634264e-06, "loss": 0.6517, "step": 10267, "task_loss": 0.8919260501861572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.658506453037262, "epoch": 8.68, "learning_rate": 7.335399643091952e-06, "loss": 0.5936, "step": 10268, "task_loss": 1.6535675525665283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3891355097293854, "epoch": 8.68, "learning_rate": 7.330703484549639e-06, "loss": 0.5521, "step": 10269, "task_loss": 0.33551591634750366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8669257164001465, "epoch": 8.68, "learning_rate": 7.326007326007326e-06, "loss": 0.5193, "step": 10270, "task_loss": 0.8533660173416138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47761911153793335, "epoch": 8.68, "learning_rate": 7.321311167465014e-06, "loss": 0.4264, "step": 10271, "task_loss": 0.8564295172691345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3078145980834961, "epoch": 8.68, "learning_rate": 7.3166150089227016e-06, "loss": 0.399, "step": 10272, "task_loss": 0.7565029263496399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4960970878601074, "epoch": 8.68, "learning_rate": 7.31191885038039e-06, "loss": 0.5368, "step": 10273, "task_loss": 2.1294572353363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7660937905311584, "epoch": 8.68, "learning_rate": 7.307222691838077e-06, "loss": 0.524, "step": 10274, "task_loss": 0.4152833819389343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3508826494216919, "epoch": 8.69, "learning_rate": 7.302526533295764e-06, "loss": 0.6549, "step": 10275, "task_loss": 0.34853848814964294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.792838454246521, "epoch": 8.69, "learning_rate": 7.297830374753452e-06, "loss": 0.4894, "step": 10276, "task_loss": 0.437909871339798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7205517292022705, "epoch": 8.69, "learning_rate": 7.293134216211139e-06, "loss": 0.5773, "step": 10277, "task_loss": 1.1230223178863525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.787097692489624, "epoch": 8.69, "learning_rate": 7.288438057668827e-06, "loss": 0.5862, "step": 10278, "task_loss": 1.151388168334961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4683302640914917, "epoch": 8.69, "learning_rate": 7.283741899126514e-06, "loss": 0.4567, "step": 10279, "task_loss": 0.6024013757705688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7732101082801819, "epoch": 8.69, "learning_rate": 7.279045740584202e-06, "loss": 0.7833, "step": 10280, "task_loss": 2.0467896461486816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2833815813064575, "epoch": 8.69, "learning_rate": 7.27434958204189e-06, "loss": 0.8617, "step": 10281, "task_loss": 1.1665875911712646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.563601016998291, "epoch": 8.69, "learning_rate": 7.2696534234995785e-06, "loss": 0.5302, "step": 10282, "task_loss": 1.1753880977630615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3792545199394226, "epoch": 8.69, "learning_rate": 7.264957264957266e-06, "loss": 0.526, "step": 10283, "task_loss": 0.45275241136550903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6064555644989014, "epoch": 8.69, "learning_rate": 7.260261106414954e-06, "loss": 0.4654, "step": 10284, "task_loss": 0.3920440077781677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43233346939086914, "epoch": 8.69, "learning_rate": 7.255564947872641e-06, "loss": 0.5527, "step": 10285, "task_loss": 0.6501238942146301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5199381113052368, "epoch": 8.69, "learning_rate": 7.250868789330328e-06, "loss": 0.501, "step": 10286, "task_loss": 0.22399379312992096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.504426896572113, "epoch": 8.7, "learning_rate": 7.246172630788016e-06, "loss": 0.6557, "step": 10287, "task_loss": 0.46140217781066895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41826102137565613, "epoch": 8.7, "learning_rate": 7.241476472245703e-06, "loss": 0.5942, "step": 10288, "task_loss": 0.9990370869636536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6491661071777344, "epoch": 8.7, "learning_rate": 7.236780313703391e-06, "loss": 0.6584, "step": 10289, "task_loss": 0.7214502096176147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38574081659317017, "epoch": 8.7, "learning_rate": 7.2320841551610785e-06, "loss": 0.4687, "step": 10290, "task_loss": 0.6558793187141418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4913393259048462, "epoch": 8.7, "learning_rate": 7.227387996618766e-06, "loss": 0.4222, "step": 10291, "task_loss": 0.8690575361251831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.621860921382904, "epoch": 8.7, "learning_rate": 7.222691838076454e-06, "loss": 0.7407, "step": 10292, "task_loss": 1.5254828929901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42471474409103394, "epoch": 8.7, "learning_rate": 7.217995679534141e-06, "loss": 0.588, "step": 10293, "task_loss": 0.5383074283599854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3305829167366028, "epoch": 8.7, "learning_rate": 7.213299520991829e-06, "loss": 0.7062, "step": 10294, "task_loss": 0.030561823397874832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2524256408214569, "epoch": 8.7, "learning_rate": 7.208603362449516e-06, "loss": 0.5451, "step": 10295, "task_loss": 0.12882976233959198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2741049826145172, "epoch": 8.7, "learning_rate": 7.203907203907203e-06, "loss": 0.537, "step": 10296, "task_loss": 0.6194612383842468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6795684099197388, "epoch": 8.7, "learning_rate": 7.199211045364892e-06, "loss": 0.9079, "step": 10297, "task_loss": 0.9307163953781128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42480748891830444, "epoch": 8.7, "learning_rate": 7.19451488682258e-06, "loss": 0.5103, "step": 10298, "task_loss": 0.18790249526500702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8875046372413635, "epoch": 8.71, "learning_rate": 7.1898187282802675e-06, "loss": 0.678, "step": 10299, "task_loss": 0.4898264408111572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41284847259521484, "epoch": 8.71, "learning_rate": 7.1851225697379555e-06, "loss": 0.5192, "step": 10300, "task_loss": 0.6449827551841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6076819896697998, "epoch": 8.71, "learning_rate": 7.180426411195643e-06, "loss": 0.6992, "step": 10301, "task_loss": 0.40686729550361633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40607112646102905, "epoch": 8.71, "learning_rate": 7.17573025265333e-06, "loss": 0.5669, "step": 10302, "task_loss": 0.2284427136182785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2656305432319641, "epoch": 8.71, "learning_rate": 7.171034094111018e-06, "loss": 0.5542, "step": 10303, "task_loss": 0.3977203369140625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3871995806694031, "epoch": 8.71, "learning_rate": 7.166337935568705e-06, "loss": 0.4727, "step": 10304, "task_loss": 0.9074400663375854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6045183539390564, "epoch": 8.71, "learning_rate": 7.161641777026393e-06, "loss": 0.6096, "step": 10305, "task_loss": 1.744801640510559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4028138816356659, "epoch": 8.71, "learning_rate": 7.15694561848408e-06, "loss": 0.6847, "step": 10306, "task_loss": 0.9243627190589905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5522833466529846, "epoch": 8.71, "learning_rate": 7.1522494599417675e-06, "loss": 0.7582, "step": 10307, "task_loss": 0.7845475077629089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32468879222869873, "epoch": 8.71, "learning_rate": 7.1475533013994555e-06, "loss": 0.6019, "step": 10308, "task_loss": 0.6948211789131165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7046904563903809, "epoch": 8.71, "learning_rate": 7.142857142857143e-06, "loss": 0.5528, "step": 10309, "task_loss": 0.7529733180999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5453115701675415, "epoch": 8.71, "learning_rate": 7.13816098431483e-06, "loss": 0.5532, "step": 10310, "task_loss": 0.7622359991073608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3607589900493622, "epoch": 8.72, "learning_rate": 7.133464825772518e-06, "loss": 0.5302, "step": 10311, "task_loss": 0.46916672587394714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6174506545066833, "epoch": 8.72, "learning_rate": 7.128768667230205e-06, "loss": 0.6919, "step": 10312, "task_loss": 0.5448486804962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6433306336402893, "epoch": 8.72, "learning_rate": 7.124072508687894e-06, "loss": 0.5014, "step": 10313, "task_loss": 0.49146610498428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.642281711101532, "epoch": 8.72, "learning_rate": 7.119376350145582e-06, "loss": 0.4231, "step": 10314, "task_loss": 0.27000492811203003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5334990620613098, "epoch": 8.72, "learning_rate": 7.114680191603269e-06, "loss": 0.4966, "step": 10315, "task_loss": 1.384645938873291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5433629155158997, "epoch": 8.72, "learning_rate": 7.109984033060957e-06, "loss": 0.4974, "step": 10316, "task_loss": 0.6724228262901306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3530283272266388, "epoch": 8.72, "learning_rate": 7.1052878745186444e-06, "loss": 0.5629, "step": 10317, "task_loss": 1.1564393043518066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8471308946609497, "epoch": 8.72, "learning_rate": 7.100591715976332e-06, "loss": 0.5103, "step": 10318, "task_loss": 0.8414833545684814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4368123412132263, "epoch": 8.72, "learning_rate": 7.09589555743402e-06, "loss": 0.3904, "step": 10319, "task_loss": 0.6403210163116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4052436947822571, "epoch": 8.72, "learning_rate": 7.091199398891707e-06, "loss": 0.5497, "step": 10320, "task_loss": 0.9208522439002991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1501593589782715, "epoch": 8.72, "learning_rate": 7.086503240349394e-06, "loss": 0.7032, "step": 10321, "task_loss": 1.1602727174758911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4892164468765259, "epoch": 8.72, "learning_rate": 7.081807081807082e-06, "loss": 0.5206, "step": 10322, "task_loss": 0.23344182968139648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5994504690170288, "epoch": 8.73, "learning_rate": 7.077110923264769e-06, "loss": 0.6369, "step": 10323, "task_loss": 0.3932313621044159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5704218149185181, "epoch": 8.73, "learning_rate": 7.072414764722457e-06, "loss": 0.5708, "step": 10324, "task_loss": 0.7841705083847046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4839526414871216, "epoch": 8.73, "learning_rate": 7.0677186061801445e-06, "loss": 0.5433, "step": 10325, "task_loss": 0.36781391501426697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47805771231651306, "epoch": 8.73, "learning_rate": 7.063022447637832e-06, "loss": 0.8155, "step": 10326, "task_loss": 1.5911816358566284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9234185218811035, "epoch": 8.73, "learning_rate": 7.05832628909552e-06, "loss": 0.5984, "step": 10327, "task_loss": 1.1262027025222778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4731239974498749, "epoch": 8.73, "learning_rate": 7.053630130553209e-06, "loss": 0.6217, "step": 10328, "task_loss": 0.8451111316680908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7011232376098633, "epoch": 8.73, "learning_rate": 7.048933972010896e-06, "loss": 0.6255, "step": 10329, "task_loss": 0.9285666346549988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7590755820274353, "epoch": 8.73, "learning_rate": 7.044237813468584e-06, "loss": 0.605, "step": 10330, "task_loss": 0.5850201845169067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6208679676055908, "epoch": 8.73, "learning_rate": 7.039541654926271e-06, "loss": 0.6377, "step": 10331, "task_loss": 0.5315170884132385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39641034603118896, "epoch": 8.73, "learning_rate": 7.034845496383958e-06, "loss": 0.53, "step": 10332, "task_loss": 1.0273722410202026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7928783893585205, "epoch": 8.73, "learning_rate": 7.030149337841646e-06, "loss": 0.5447, "step": 10333, "task_loss": 0.4759225845336914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3300149738788605, "epoch": 8.73, "learning_rate": 7.025453179299333e-06, "loss": 0.515, "step": 10334, "task_loss": 0.19636566936969757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4446650445461273, "epoch": 8.74, "learning_rate": 7.0207570207570214e-06, "loss": 0.4545, "step": 10335, "task_loss": 0.1241639032959938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5092456340789795, "epoch": 8.74, "learning_rate": 7.016060862214709e-06, "loss": 0.6787, "step": 10336, "task_loss": 1.131974697113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9969494938850403, "epoch": 8.74, "learning_rate": 7.011364703672396e-06, "loss": 0.5936, "step": 10337, "task_loss": 1.2124367952346802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7758007049560547, "epoch": 8.74, "learning_rate": 7.006668545130084e-06, "loss": 0.5903, "step": 10338, "task_loss": 1.1535673141479492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9242762923240662, "epoch": 8.74, "learning_rate": 7.001972386587771e-06, "loss": 0.7378, "step": 10339, "task_loss": 2.1080024242401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1646398305892944, "epoch": 8.74, "learning_rate": 6.997276228045459e-06, "loss": 0.7864, "step": 10340, "task_loss": 1.020667314529419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48240146040916443, "epoch": 8.74, "learning_rate": 6.992580069503146e-06, "loss": 0.5039, "step": 10341, "task_loss": 0.801193118095398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.546881914138794, "epoch": 8.74, "learning_rate": 6.9878839109608334e-06, "loss": 0.5739, "step": 10342, "task_loss": 0.7922036647796631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5985745191574097, "epoch": 8.74, "learning_rate": 6.9831877524185215e-06, "loss": 0.5882, "step": 10343, "task_loss": 0.7803962230682373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5321500897407532, "epoch": 8.74, "learning_rate": 6.97849159387621e-06, "loss": 0.5902, "step": 10344, "task_loss": 1.0890376567840576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4115813672542572, "epoch": 8.74, "learning_rate": 6.9737954353338975e-06, "loss": 0.5625, "step": 10345, "task_loss": 0.5858675241470337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5150871276855469, "epoch": 8.75, "learning_rate": 6.9690992767915856e-06, "loss": 0.5448, "step": 10346, "task_loss": 1.65718674659729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6447421312332153, "epoch": 8.75, "learning_rate": 6.964403118249273e-06, "loss": 0.5909, "step": 10347, "task_loss": 0.6460264921188354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5438698530197144, "epoch": 8.75, "learning_rate": 6.95970695970696e-06, "loss": 0.4946, "step": 10348, "task_loss": 0.27395099401474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5077297687530518, "epoch": 8.75, "learning_rate": 6.955010801164648e-06, "loss": 0.6026, "step": 10349, "task_loss": 0.3998420536518097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2089540660381317, "epoch": 8.75, "learning_rate": 6.950314642622335e-06, "loss": 0.4381, "step": 10350, "task_loss": 0.23408906161785126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5050946474075317, "epoch": 8.75, "learning_rate": 6.945618484080023e-06, "loss": 0.612, "step": 10351, "task_loss": 1.038474440574646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6926410794258118, "epoch": 8.75, "learning_rate": 6.94092232553771e-06, "loss": 0.5846, "step": 10352, "task_loss": 0.8045667409896851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7609493732452393, "epoch": 8.75, "learning_rate": 6.9362261669953976e-06, "loss": 0.7227, "step": 10353, "task_loss": 1.123343825340271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7818836569786072, "epoch": 8.75, "learning_rate": 6.931530008453086e-06, "loss": 0.7368, "step": 10354, "task_loss": 0.962765634059906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9259731769561768, "epoch": 8.75, "learning_rate": 6.926833849910773e-06, "loss": 0.545, "step": 10355, "task_loss": 1.2098792791366577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5749423503875732, "epoch": 8.75, "learning_rate": 6.922137691368461e-06, "loss": 0.647, "step": 10356, "task_loss": 0.8294190764427185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5515408515930176, "epoch": 8.75, "learning_rate": 6.917441532826148e-06, "loss": 0.4932, "step": 10357, "task_loss": 0.49761950969696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46829307079315186, "epoch": 8.76, "learning_rate": 6.912745374283835e-06, "loss": 0.5233, "step": 10358, "task_loss": 0.3312387466430664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5920547246932983, "epoch": 8.76, "learning_rate": 6.908049215741524e-06, "loss": 0.6569, "step": 10359, "task_loss": 1.524283766746521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.824539303779602, "epoch": 8.76, "learning_rate": 6.903353057199212e-06, "loss": 0.5025, "step": 10360, "task_loss": 1.139722466468811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.440110445022583, "epoch": 8.76, "learning_rate": 6.898656898656899e-06, "loss": 0.6019, "step": 10361, "task_loss": 0.2952304482460022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5744550824165344, "epoch": 8.76, "learning_rate": 6.893960740114587e-06, "loss": 0.5686, "step": 10362, "task_loss": 0.7930154800415039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8894174098968506, "epoch": 8.76, "learning_rate": 6.8892645815722745e-06, "loss": 0.6205, "step": 10363, "task_loss": 0.7520231008529663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4098445773124695, "epoch": 8.76, "learning_rate": 6.884568423029962e-06, "loss": 0.5809, "step": 10364, "task_loss": 0.4273567497730255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47490426898002625, "epoch": 8.76, "learning_rate": 6.87987226448765e-06, "loss": 0.7179, "step": 10365, "task_loss": 0.40222540497779846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38343924283981323, "epoch": 8.76, "learning_rate": 6.875176105945337e-06, "loss": 0.4499, "step": 10366, "task_loss": 0.7117496132850647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5507478713989258, "epoch": 8.76, "learning_rate": 6.870479947403025e-06, "loss": 0.5673, "step": 10367, "task_loss": 0.4095752239227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5391383171081543, "epoch": 8.76, "learning_rate": 6.865783788860712e-06, "loss": 0.7056, "step": 10368, "task_loss": 0.2525249421596527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.003394603729248, "epoch": 8.76, "learning_rate": 6.861087630318399e-06, "loss": 0.7911, "step": 10369, "task_loss": 1.281363844871521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47802233695983887, "epoch": 8.77, "learning_rate": 6.856391471776087e-06, "loss": 0.4298, "step": 10370, "task_loss": 1.5152432918548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.24873724579811096, "epoch": 8.77, "learning_rate": 6.8516953132337745e-06, "loss": 0.5605, "step": 10371, "task_loss": 0.3715301752090454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6261681914329529, "epoch": 8.77, "learning_rate": 6.846999154691463e-06, "loss": 0.475, "step": 10372, "task_loss": 0.27044540643692017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5969523787498474, "epoch": 8.77, "learning_rate": 6.84230299614915e-06, "loss": 0.5063, "step": 10373, "task_loss": 0.7095768451690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4579583406448364, "epoch": 8.77, "learning_rate": 6.837606837606839e-06, "loss": 0.5751, "step": 10374, "task_loss": 0.27386367321014404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7495284080505371, "epoch": 8.77, "learning_rate": 6.832910679064526e-06, "loss": 0.5947, "step": 10375, "task_loss": 0.4731720983982086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5301280617713928, "epoch": 8.77, "learning_rate": 6.828214520522214e-06, "loss": 0.4248, "step": 10376, "task_loss": 0.32319796085357666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29669851064682007, "epoch": 8.77, "learning_rate": 6.823518361979901e-06, "loss": 0.4883, "step": 10377, "task_loss": 0.4422501027584076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23853106796741486, "epoch": 8.77, "learning_rate": 6.818822203437589e-06, "loss": 0.4753, "step": 10378, "task_loss": 0.06537524610757828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4235384166240692, "epoch": 8.77, "learning_rate": 6.814126044895276e-06, "loss": 0.5502, "step": 10379, "task_loss": 0.5021094083786011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43977999687194824, "epoch": 8.77, "learning_rate": 6.8094298863529635e-06, "loss": 0.5426, "step": 10380, "task_loss": 1.261482834815979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4326688051223755, "epoch": 8.77, "learning_rate": 6.8047337278106515e-06, "loss": 0.5728, "step": 10381, "task_loss": 0.1569068729877472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2495833933353424, "epoch": 8.78, "learning_rate": 6.800037569268339e-06, "loss": 0.4239, "step": 10382, "task_loss": 0.8380419611930847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44397997856140137, "epoch": 8.78, "learning_rate": 6.795341410726027e-06, "loss": 0.612, "step": 10383, "task_loss": 0.8200160264968872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6290370225906372, "epoch": 8.78, "learning_rate": 6.790645252183714e-06, "loss": 0.6999, "step": 10384, "task_loss": 0.29773586988449097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4930514097213745, "epoch": 8.78, "learning_rate": 6.785949093641401e-06, "loss": 0.4861, "step": 10385, "task_loss": 0.42606353759765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6975401043891907, "epoch": 8.78, "learning_rate": 6.781252935099089e-06, "loss": 0.6302, "step": 10386, "task_loss": 0.9619995951652527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9207071661949158, "epoch": 8.78, "learning_rate": 6.776556776556776e-06, "loss": 0.5637, "step": 10387, "task_loss": 1.1698555946350098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2553858160972595, "epoch": 8.78, "learning_rate": 6.7718606180144635e-06, "loss": 0.4198, "step": 10388, "task_loss": 0.3329714834690094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6454405784606934, "epoch": 8.78, "learning_rate": 6.7671644594721515e-06, "loss": 0.7077, "step": 10389, "task_loss": 1.0938903093338013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.4060289859771729, "epoch": 8.78, "learning_rate": 6.76246830092984e-06, "loss": 0.8242, "step": 10390, "task_loss": 0.9555860757827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3893490135669708, "epoch": 8.78, "learning_rate": 6.757772142387528e-06, "loss": 0.5632, "step": 10391, "task_loss": 0.7246307134628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5024700164794922, "epoch": 8.78, "learning_rate": 6.753075983845216e-06, "loss": 0.5813, "step": 10392, "task_loss": 0.7267103791236877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7856003642082214, "epoch": 8.78, "learning_rate": 6.748379825302903e-06, "loss": 0.5567, "step": 10393, "task_loss": 1.0447009801864624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5717740058898926, "epoch": 8.79, "learning_rate": 6.743683666760591e-06, "loss": 0.6829, "step": 10394, "task_loss": 0.6524147391319275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9063979983329773, "epoch": 8.79, "learning_rate": 6.738987508218278e-06, "loss": 0.5572, "step": 10395, "task_loss": 0.7100487351417542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4303475022315979, "epoch": 8.79, "learning_rate": 6.734291349675965e-06, "loss": 0.4604, "step": 10396, "task_loss": 0.4483071267604828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5285152196884155, "epoch": 8.79, "learning_rate": 6.729595191133653e-06, "loss": 0.8343, "step": 10397, "task_loss": 0.3597392737865448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6513479948043823, "epoch": 8.79, "learning_rate": 6.7248990325913404e-06, "loss": 0.468, "step": 10398, "task_loss": 0.11653785407543182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2905402183532715, "epoch": 8.79, "learning_rate": 6.720202874049028e-06, "loss": 0.5471, "step": 10399, "task_loss": 0.5148707032203674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6005892157554626, "epoch": 8.79, "learning_rate": 6.715506715506716e-06, "loss": 0.5781, "step": 10400, "task_loss": 1.1965886354446411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5519025325775146, "epoch": 8.79, "learning_rate": 6.710810556964403e-06, "loss": 0.5018, "step": 10401, "task_loss": 0.628398597240448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3725884258747101, "epoch": 8.79, "learning_rate": 6.706114398422091e-06, "loss": 0.5824, "step": 10402, "task_loss": 0.3823162615299225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3396443724632263, "epoch": 8.79, "learning_rate": 6.701418239879778e-06, "loss": 0.4836, "step": 10403, "task_loss": 0.46902385354042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6300222873687744, "epoch": 8.79, "learning_rate": 6.696722081337465e-06, "loss": 0.6315, "step": 10404, "task_loss": 0.5616195201873779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6996287107467651, "epoch": 8.79, "learning_rate": 6.692025922795154e-06, "loss": 0.6816, "step": 10405, "task_loss": 0.4466788172721863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42925891280174255, "epoch": 8.8, "learning_rate": 6.687329764252842e-06, "loss": 0.4206, "step": 10406, "task_loss": 1.5744917392730713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5955492258071899, "epoch": 8.8, "learning_rate": 6.682633605710529e-06, "loss": 0.4532, "step": 10407, "task_loss": 0.784270167350769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3209541440010071, "epoch": 8.8, "learning_rate": 6.677937447168217e-06, "loss": 0.4188, "step": 10408, "task_loss": 0.6802537441253662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6133323311805725, "epoch": 8.8, "learning_rate": 6.673241288625905e-06, "loss": 0.5299, "step": 10409, "task_loss": 0.21387360990047455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4159771502017975, "epoch": 8.8, "learning_rate": 6.668545130083592e-06, "loss": 0.3602, "step": 10410, "task_loss": 0.21253615617752075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7224434614181519, "epoch": 8.8, "learning_rate": 6.66384897154128e-06, "loss": 0.6049, "step": 10411, "task_loss": 1.0532963275909424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6067714691162109, "epoch": 8.8, "learning_rate": 6.659152812998967e-06, "loss": 0.6459, "step": 10412, "task_loss": 1.8054485321044922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5165995955467224, "epoch": 8.8, "learning_rate": 6.654456654456655e-06, "loss": 0.5703, "step": 10413, "task_loss": 0.8915274739265442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5776345729827881, "epoch": 8.8, "learning_rate": 6.649760495914342e-06, "loss": 0.5018, "step": 10414, "task_loss": 0.7522205710411072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6961765289306641, "epoch": 8.8, "learning_rate": 6.645064337372029e-06, "loss": 0.6906, "step": 10415, "task_loss": 1.2775108814239502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4550372362136841, "epoch": 8.8, "learning_rate": 6.6403681788297174e-06, "loss": 0.5347, "step": 10416, "task_loss": 0.16290467977523804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6905685663223267, "epoch": 8.81, "learning_rate": 6.635672020287405e-06, "loss": 0.6114, "step": 10417, "task_loss": 1.5535417795181274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.75383061170578, "epoch": 8.81, "learning_rate": 6.630975861745093e-06, "loss": 0.6003, "step": 10418, "task_loss": 0.9565126895904541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49867987632751465, "epoch": 8.81, "learning_rate": 6.62627970320278e-06, "loss": 0.6153, "step": 10419, "task_loss": 0.46796485781669617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4652766287326813, "epoch": 8.81, "learning_rate": 6.621583544660467e-06, "loss": 0.5645, "step": 10420, "task_loss": 1.2161763906478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6370488405227661, "epoch": 8.81, "learning_rate": 6.616887386118156e-06, "loss": 0.5869, "step": 10421, "task_loss": 0.8086147904396057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2697755694389343, "epoch": 8.81, "learning_rate": 6.612191227575844e-06, "loss": 0.5027, "step": 10422, "task_loss": 0.624340832233429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38525134325027466, "epoch": 8.81, "learning_rate": 6.607495069033531e-06, "loss": 0.6076, "step": 10423, "task_loss": 0.9852173924446106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7979705333709717, "epoch": 8.81, "learning_rate": 6.602798910491219e-06, "loss": 0.5747, "step": 10424, "task_loss": 0.4745531976222992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3308601379394531, "epoch": 8.81, "learning_rate": 6.598102751948906e-06, "loss": 0.4769, "step": 10425, "task_loss": 0.5257015228271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34645822644233704, "epoch": 8.81, "learning_rate": 6.5934065934065935e-06, "loss": 0.6759, "step": 10426, "task_loss": 0.26383987069129944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6514652967453003, "epoch": 8.81, "learning_rate": 6.5887104348642816e-06, "loss": 0.5821, "step": 10427, "task_loss": 1.3307198286056519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.512481689453125, "epoch": 8.81, "learning_rate": 6.584014276321969e-06, "loss": 0.4809, "step": 10428, "task_loss": 0.9226614832878113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4696445167064667, "epoch": 8.82, "learning_rate": 6.579318117779657e-06, "loss": 0.4013, "step": 10429, "task_loss": 0.8072288632392883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7221746444702148, "epoch": 8.82, "learning_rate": 6.574621959237344e-06, "loss": 0.6356, "step": 10430, "task_loss": 0.8518087863922119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3668014705181122, "epoch": 8.82, "learning_rate": 6.569925800695031e-06, "loss": 0.5083, "step": 10431, "task_loss": 1.0403099060058594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4611468017101288, "epoch": 8.82, "learning_rate": 6.565229642152719e-06, "loss": 0.62, "step": 10432, "task_loss": 1.0703948736190796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39971137046813965, "epoch": 8.82, "learning_rate": 6.560533483610406e-06, "loss": 0.5758, "step": 10433, "task_loss": 0.391137033700943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4856651723384857, "epoch": 8.82, "learning_rate": 6.555837325068094e-06, "loss": 0.4485, "step": 10434, "task_loss": 0.25058242678642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4118480384349823, "epoch": 8.82, "learning_rate": 6.551141166525782e-06, "loss": 0.5063, "step": 10435, "task_loss": 0.7992642521858215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5740260481834412, "epoch": 8.82, "learning_rate": 6.5464450079834705e-06, "loss": 0.7054, "step": 10436, "task_loss": 0.5549280047416687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9405794739723206, "epoch": 8.82, "learning_rate": 6.541748849441158e-06, "loss": 0.6946, "step": 10437, "task_loss": 1.5863858461380005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5440404415130615, "epoch": 8.82, "learning_rate": 6.537052690898846e-06, "loss": 0.54, "step": 10438, "task_loss": 1.5026124715805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7753065228462219, "epoch": 8.82, "learning_rate": 6.532356532356533e-06, "loss": 0.5878, "step": 10439, "task_loss": 0.5904995203018188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1920926570892334, "epoch": 8.82, "learning_rate": 6.527660373814221e-06, "loss": 0.7118, "step": 10440, "task_loss": 1.282610297203064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5998774766921997, "epoch": 8.83, "learning_rate": 6.522964215271908e-06, "loss": 0.5666, "step": 10441, "task_loss": 0.7203288078308105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.556732177734375, "epoch": 8.83, "learning_rate": 6.518268056729595e-06, "loss": 0.6738, "step": 10442, "task_loss": 0.6162696480751038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5914292335510254, "epoch": 8.83, "learning_rate": 6.513571898187283e-06, "loss": 0.4567, "step": 10443, "task_loss": 1.6884870529174805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6201949119567871, "epoch": 8.83, "learning_rate": 6.5088757396449705e-06, "loss": 0.6272, "step": 10444, "task_loss": 0.8216149806976318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6271955966949463, "epoch": 8.83, "learning_rate": 6.5041795811026586e-06, "loss": 0.6692, "step": 10445, "task_loss": 0.11766261607408524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5160413384437561, "epoch": 8.83, "learning_rate": 6.499483422560346e-06, "loss": 0.441, "step": 10446, "task_loss": 0.30405688285827637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5667554140090942, "epoch": 8.83, "learning_rate": 6.494787264018033e-06, "loss": 0.6421, "step": 10447, "task_loss": 1.0299257040023804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6120223999023438, "epoch": 8.83, "learning_rate": 6.490091105475721e-06, "loss": 0.5278, "step": 10448, "task_loss": 0.4050830006599426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5379377603530884, "epoch": 8.83, "learning_rate": 6.485394946933408e-06, "loss": 0.6638, "step": 10449, "task_loss": 0.7679821848869324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4935733675956726, "epoch": 8.83, "learning_rate": 6.480698788391096e-06, "loss": 0.6012, "step": 10450, "task_loss": 0.7826787233352661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8047462701797485, "epoch": 8.83, "learning_rate": 6.476002629848785e-06, "loss": 0.8206, "step": 10451, "task_loss": 1.1417081356048584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45155438780784607, "epoch": 8.83, "learning_rate": 6.471306471306472e-06, "loss": 0.5956, "step": 10452, "task_loss": 0.12050943821668625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4839274287223816, "epoch": 8.84, "learning_rate": 6.4666103127641594e-06, "loss": 0.5678, "step": 10453, "task_loss": 0.28474777936935425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9469181299209595, "epoch": 8.84, "learning_rate": 6.4619141542218475e-06, "loss": 0.5997, "step": 10454, "task_loss": 1.0008409023284912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0321787595748901, "epoch": 8.84, "learning_rate": 6.457217995679535e-06, "loss": 0.5809, "step": 10455, "task_loss": 0.8065201044082642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4036603271961212, "epoch": 8.84, "learning_rate": 6.452521837137223e-06, "loss": 0.5835, "step": 10456, "task_loss": 1.1362075805664062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1988568305969238, "epoch": 8.84, "learning_rate": 6.44782567859491e-06, "loss": 0.7116, "step": 10457, "task_loss": 0.8144145011901855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36128729581832886, "epoch": 8.84, "learning_rate": 6.443129520052597e-06, "loss": 0.4231, "step": 10458, "task_loss": 1.609217643737793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6983079314231873, "epoch": 8.84, "learning_rate": 6.438433361510285e-06, "loss": 0.508, "step": 10459, "task_loss": 0.6522270441055298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42222023010253906, "epoch": 8.84, "learning_rate": 6.433737202967972e-06, "loss": 0.5058, "step": 10460, "task_loss": 0.7344433665275574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46815043687820435, "epoch": 8.84, "learning_rate": 6.42904104442566e-06, "loss": 0.5615, "step": 10461, "task_loss": 0.7782984375953674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40455693006515503, "epoch": 8.84, "learning_rate": 6.4243448858833475e-06, "loss": 0.6212, "step": 10462, "task_loss": 0.916233241558075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5114191770553589, "epoch": 8.84, "learning_rate": 6.419648727341035e-06, "loss": 0.6404, "step": 10463, "task_loss": 0.49829959869384766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39242225885391235, "epoch": 8.84, "learning_rate": 6.414952568798723e-06, "loss": 0.4748, "step": 10464, "task_loss": 0.5040316581726074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3969857096672058, "epoch": 8.85, "learning_rate": 6.41025641025641e-06, "loss": 0.4998, "step": 10465, "task_loss": 0.7113975882530212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4179069995880127, "epoch": 8.85, "learning_rate": 6.405560251714097e-06, "loss": 0.5858, "step": 10466, "task_loss": 0.6395835280418396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.24902011454105377, "epoch": 8.85, "learning_rate": 6.400864093171787e-06, "loss": 0.4231, "step": 10467, "task_loss": 1.0332974195480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6002901792526245, "epoch": 8.85, "learning_rate": 6.396167934629474e-06, "loss": 0.6387, "step": 10468, "task_loss": 0.9690895676612854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3422654867172241, "epoch": 8.85, "learning_rate": 6.391471776087161e-06, "loss": 0.5816, "step": 10469, "task_loss": 1.0971112251281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6650229692459106, "epoch": 8.85, "learning_rate": 6.386775617544849e-06, "loss": 0.7185, "step": 10470, "task_loss": 1.0368298292160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26948630809783936, "epoch": 8.85, "learning_rate": 6.382079459002536e-06, "loss": 0.5034, "step": 10471, "task_loss": 0.6586220264434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49230918288230896, "epoch": 8.85, "learning_rate": 6.3773833004602245e-06, "loss": 0.6279, "step": 10472, "task_loss": 1.0231226682662964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4852588176727295, "epoch": 8.85, "learning_rate": 6.372687141917912e-06, "loss": 0.6573, "step": 10473, "task_loss": 0.7053407430648804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5049886703491211, "epoch": 8.85, "learning_rate": 6.367990983375599e-06, "loss": 0.5053, "step": 10474, "task_loss": 0.38399216532707214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47920000553131104, "epoch": 8.85, "learning_rate": 6.363294824833287e-06, "loss": 0.501, "step": 10475, "task_loss": 0.24468664824962616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4886174499988556, "epoch": 8.85, "learning_rate": 6.358598666290974e-06, "loss": 0.467, "step": 10476, "task_loss": 0.7694076895713806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7213046550750732, "epoch": 8.86, "learning_rate": 6.353902507748661e-06, "loss": 0.5097, "step": 10477, "task_loss": 0.5757997035980225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3465002179145813, "epoch": 8.86, "learning_rate": 6.349206349206349e-06, "loss": 0.5685, "step": 10478, "task_loss": 0.28717365860939026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8193284869194031, "epoch": 8.86, "learning_rate": 6.3445101906640365e-06, "loss": 0.6364, "step": 10479, "task_loss": 0.3689892888069153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4618656635284424, "epoch": 8.86, "learning_rate": 6.3398140321217245e-06, "loss": 0.4799, "step": 10480, "task_loss": 0.685082197189331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6649463772773743, "epoch": 8.86, "learning_rate": 6.335117873579412e-06, "loss": 0.6973, "step": 10481, "task_loss": 1.2995537519454956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6655111908912659, "epoch": 8.86, "learning_rate": 6.3304217150371006e-06, "loss": 0.7191, "step": 10482, "task_loss": 1.5514754056930542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.872948408126831, "epoch": 8.86, "learning_rate": 6.325725556494788e-06, "loss": 0.6074, "step": 10483, "task_loss": 0.46104952692985535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47382640838623047, "epoch": 8.86, "learning_rate": 6.321029397952476e-06, "loss": 0.6258, "step": 10484, "task_loss": 0.1993219405412674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6135778427124023, "epoch": 8.86, "learning_rate": 6.316333239410163e-06, "loss": 0.6431, "step": 10485, "task_loss": 1.2203813791275024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48063546419143677, "epoch": 8.86, "learning_rate": 6.311637080867851e-06, "loss": 0.5087, "step": 10486, "task_loss": 0.6722642183303833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4375566840171814, "epoch": 8.86, "learning_rate": 6.306940922325538e-06, "loss": 0.4939, "step": 10487, "task_loss": 0.6924952268600464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4275206923484802, "epoch": 8.87, "learning_rate": 6.302244763783225e-06, "loss": 0.6227, "step": 10488, "task_loss": 1.223399043083191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4871463477611542, "epoch": 8.87, "learning_rate": 6.297548605240913e-06, "loss": 0.473, "step": 10489, "task_loss": 0.7852860689163208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6707467436790466, "epoch": 8.87, "learning_rate": 6.292852446698601e-06, "loss": 0.661, "step": 10490, "task_loss": 0.4987085461616516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.919183075428009, "epoch": 8.87, "learning_rate": 6.288156288156289e-06, "loss": 0.5969, "step": 10491, "task_loss": 0.7220089435577393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26325440406799316, "epoch": 8.87, "learning_rate": 6.283460129613976e-06, "loss": 0.5616, "step": 10492, "task_loss": 0.7322744727134705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5973873138427734, "epoch": 8.87, "learning_rate": 6.278763971071663e-06, "loss": 0.6275, "step": 10493, "task_loss": 1.1629621982574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48154014348983765, "epoch": 8.87, "learning_rate": 6.274067812529351e-06, "loss": 0.4906, "step": 10494, "task_loss": 0.6950964331626892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7592697143554688, "epoch": 8.87, "learning_rate": 6.269371653987038e-06, "loss": 0.6165, "step": 10495, "task_loss": 1.1498048305511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6751308441162109, "epoch": 8.87, "learning_rate": 6.264675495444726e-06, "loss": 0.5845, "step": 10496, "task_loss": 0.6908773183822632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5847740173339844, "epoch": 8.87, "learning_rate": 6.2599793369024134e-06, "loss": 0.5012, "step": 10497, "task_loss": 0.34573429822921753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47404396533966064, "epoch": 8.87, "learning_rate": 6.255283178360102e-06, "loss": 0.5892, "step": 10498, "task_loss": 0.39461416006088257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4307149052619934, "epoch": 8.87, "learning_rate": 6.2505870198177895e-06, "loss": 0.5487, "step": 10499, "task_loss": 0.3059642016887665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6312223672866821, "epoch": 8.88, "learning_rate": 6.245890861275477e-06, "loss": 0.5843, "step": 10500, "task_loss": 1.181584358215332 }, { "epoch": 8.88, "eval_accuracy": 0.9035247524752476, "eval_loss": 0.36775341629981995, "eval_runtime": 226.4418, "eval_samples_per_second": 111.508, "eval_steps_per_second": 0.874, "step": 10500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6175811290740967, "epoch": 8.88, "learning_rate": 6.241194702733164e-06, "loss": 0.5708, "step": 10501, "task_loss": 0.7046071887016296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4059455394744873, "epoch": 8.88, "learning_rate": 6.236498544190853e-06, "loss": 0.6378, "step": 10502, "task_loss": 0.7925266623497009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7190342545509338, "epoch": 8.88, "learning_rate": 6.23180238564854e-06, "loss": 0.6777, "step": 10503, "task_loss": 0.825637936592102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5120416879653931, "epoch": 8.88, "learning_rate": 6.227106227106227e-06, "loss": 0.5795, "step": 10504, "task_loss": 1.0261753797531128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3790232837200165, "epoch": 8.88, "learning_rate": 6.222410068563915e-06, "loss": 0.3805, "step": 10505, "task_loss": 0.5463254451751709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35531336069107056, "epoch": 8.88, "learning_rate": 6.217713910021602e-06, "loss": 0.4518, "step": 10506, "task_loss": 0.3739545941352844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49007144570350647, "epoch": 8.88, "learning_rate": 6.21301775147929e-06, "loss": 0.6577, "step": 10507, "task_loss": 0.5178561210632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7658932209014893, "epoch": 8.88, "learning_rate": 6.2083215929369776e-06, "loss": 0.4989, "step": 10508, "task_loss": 0.9057541489601135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41150379180908203, "epoch": 8.88, "learning_rate": 6.203625434394666e-06, "loss": 0.5817, "step": 10509, "task_loss": 0.5549957752227783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5359537601470947, "epoch": 8.88, "learning_rate": 6.198929275852354e-06, "loss": 0.4818, "step": 10510, "task_loss": 0.24790579080581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6541622877120972, "epoch": 8.88, "learning_rate": 6.194233117310041e-06, "loss": 0.5468, "step": 10511, "task_loss": 0.73049396276474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3074570298194885, "epoch": 8.89, "learning_rate": 6.189536958767728e-06, "loss": 0.5419, "step": 10512, "task_loss": 0.38451525568962097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4428836703300476, "epoch": 8.89, "learning_rate": 6.184840800225416e-06, "loss": 0.5495, "step": 10513, "task_loss": 1.2117732763290405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37948668003082275, "epoch": 8.89, "learning_rate": 6.180144641683103e-06, "loss": 0.3615, "step": 10514, "task_loss": 0.5162052512168884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7426187992095947, "epoch": 8.89, "learning_rate": 6.175448483140791e-06, "loss": 0.6237, "step": 10515, "task_loss": 1.619215488433838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9728736281394958, "epoch": 8.89, "learning_rate": 6.1707523245984785e-06, "loss": 0.7038, "step": 10516, "task_loss": 0.3186282217502594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7929612398147583, "epoch": 8.89, "learning_rate": 6.1660561660561665e-06, "loss": 0.6147, "step": 10517, "task_loss": 0.8639477491378784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5141814351081848, "epoch": 8.89, "learning_rate": 6.1613600075138545e-06, "loss": 0.5702, "step": 10518, "task_loss": 0.739983081817627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4008757174015045, "epoch": 8.89, "learning_rate": 6.156663848971542e-06, "loss": 0.6664, "step": 10519, "task_loss": 0.39638882875442505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7872204184532166, "epoch": 8.89, "learning_rate": 6.151967690429229e-06, "loss": 0.6088, "step": 10520, "task_loss": 0.9730578660964966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48557716608047485, "epoch": 8.89, "learning_rate": 6.147271531886917e-06, "loss": 0.5429, "step": 10521, "task_loss": 0.5730066299438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.065480351448059, "epoch": 8.89, "learning_rate": 6.142575373344604e-06, "loss": 0.6306, "step": 10522, "task_loss": 1.4081183671951294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2601175606250763, "epoch": 8.89, "learning_rate": 6.137879214802292e-06, "loss": 0.4472, "step": 10523, "task_loss": 0.035313066095113754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4732591509819031, "epoch": 8.9, "learning_rate": 6.133183056259979e-06, "loss": 0.5128, "step": 10524, "task_loss": 0.22989100217819214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5309720039367676, "epoch": 8.9, "learning_rate": 6.128486897717667e-06, "loss": 0.6482, "step": 10525, "task_loss": 0.4045575261116028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25010979175567627, "epoch": 8.9, "learning_rate": 6.123790739175355e-06, "loss": 0.4692, "step": 10526, "task_loss": 0.7692723870277405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2988647520542145, "epoch": 8.9, "learning_rate": 6.119094580633043e-06, "loss": 0.4172, "step": 10527, "task_loss": 0.2577003240585327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34933677315711975, "epoch": 8.9, "learning_rate": 6.11439842209073e-06, "loss": 0.4424, "step": 10528, "task_loss": 0.5399209856987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5226921439170837, "epoch": 8.9, "learning_rate": 6.109702263548418e-06, "loss": 0.6551, "step": 10529, "task_loss": 0.8803897500038147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.012583613395691, "epoch": 8.9, "learning_rate": 6.105006105006105e-06, "loss": 0.7396, "step": 10530, "task_loss": 2.12973690032959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6580854654312134, "epoch": 8.9, "learning_rate": 6.100309946463793e-06, "loss": 0.5825, "step": 10531, "task_loss": 1.1332801580429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42751938104629517, "epoch": 8.9, "learning_rate": 6.095613787921481e-06, "loss": 0.4451, "step": 10532, "task_loss": 1.241970181465149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9219502806663513, "epoch": 8.9, "learning_rate": 6.090917629379168e-06, "loss": 0.7883, "step": 10533, "task_loss": 1.1312386989593506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5064666271209717, "epoch": 8.9, "learning_rate": 6.086221470836856e-06, "loss": 0.5312, "step": 10534, "task_loss": 1.2637144327163696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0976104736328125, "epoch": 8.9, "learning_rate": 6.0815253122945435e-06, "loss": 0.6865, "step": 10535, "task_loss": 1.5269243717193604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7505024671554565, "epoch": 8.91, "learning_rate": 6.076829153752231e-06, "loss": 0.5784, "step": 10536, "task_loss": 0.668350100517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3253743350505829, "epoch": 8.91, "learning_rate": 6.072132995209919e-06, "loss": 0.4478, "step": 10537, "task_loss": 0.4265812039375305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39171114563941956, "epoch": 8.91, "learning_rate": 6.067436836667606e-06, "loss": 0.5411, "step": 10538, "task_loss": 1.0479719638824463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5179042220115662, "epoch": 8.91, "learning_rate": 6.062740678125294e-06, "loss": 0.6212, "step": 10539, "task_loss": 1.1634745597839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5120424628257751, "epoch": 8.91, "learning_rate": 6.058044519582982e-06, "loss": 0.453, "step": 10540, "task_loss": 1.7708070278167725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7572463154792786, "epoch": 8.91, "learning_rate": 6.053348361040669e-06, "loss": 0.592, "step": 10541, "task_loss": 0.7134084105491638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49136799573898315, "epoch": 8.91, "learning_rate": 6.048652202498357e-06, "loss": 0.5125, "step": 10542, "task_loss": 0.4384304881095886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5600651502609253, "epoch": 8.91, "learning_rate": 6.043956043956044e-06, "loss": 0.5287, "step": 10543, "task_loss": 0.4968068301677704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4539504647254944, "epoch": 8.91, "learning_rate": 6.0392598854137315e-06, "loss": 0.6413, "step": 10544, "task_loss": 0.7371556758880615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7250306606292725, "epoch": 8.91, "learning_rate": 6.03456372687142e-06, "loss": 0.4634, "step": 10545, "task_loss": 1.3915570974349976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36432817578315735, "epoch": 8.91, "learning_rate": 6.029867568329107e-06, "loss": 0.5151, "step": 10546, "task_loss": 0.3992324471473694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3606415390968323, "epoch": 8.91, "learning_rate": 6.025171409786794e-06, "loss": 0.4366, "step": 10547, "task_loss": 0.8253019452095032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.680189847946167, "epoch": 8.92, "learning_rate": 6.020475251244483e-06, "loss": 0.6128, "step": 10548, "task_loss": 1.7388219833374023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7580942511558533, "epoch": 8.92, "learning_rate": 6.01577909270217e-06, "loss": 0.5784, "step": 10549, "task_loss": 0.4446659982204437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7265965342521667, "epoch": 8.92, "learning_rate": 6.011082934159857e-06, "loss": 0.6846, "step": 10550, "task_loss": 0.7343148589134216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7248297333717346, "epoch": 8.92, "learning_rate": 6.006386775617545e-06, "loss": 0.6278, "step": 10551, "task_loss": 0.8422881364822388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4567148983478546, "epoch": 8.92, "learning_rate": 6.0016906170752324e-06, "loss": 0.5052, "step": 10552, "task_loss": 0.34851792454719543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6441633105278015, "epoch": 8.92, "learning_rate": 5.9969944585329205e-06, "loss": 0.8101, "step": 10553, "task_loss": 0.43911269307136536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4971086084842682, "epoch": 8.92, "learning_rate": 5.992298299990608e-06, "loss": 0.5359, "step": 10554, "task_loss": 0.5593665838241577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6054649353027344, "epoch": 8.92, "learning_rate": 5.987602141448296e-06, "loss": 0.5545, "step": 10555, "task_loss": 0.5217118859291077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3951791524887085, "epoch": 8.92, "learning_rate": 5.982905982905984e-06, "loss": 0.5078, "step": 10556, "task_loss": 0.9322754740715027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4569542407989502, "epoch": 8.92, "learning_rate": 5.978209824363671e-06, "loss": 0.6605, "step": 10557, "task_loss": 1.0454529523849487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4214921295642853, "epoch": 8.92, "learning_rate": 5.973513665821358e-06, "loss": 0.4589, "step": 10558, "task_loss": 0.8318082094192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8459569215774536, "epoch": 8.93, "learning_rate": 5.968817507279046e-06, "loss": 0.6993, "step": 10559, "task_loss": 0.8212286233901978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5788612365722656, "epoch": 8.93, "learning_rate": 5.964121348736733e-06, "loss": 0.5711, "step": 10560, "task_loss": 1.0399996042251587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0869815349578857, "epoch": 8.93, "learning_rate": 5.959425190194421e-06, "loss": 0.6249, "step": 10561, "task_loss": 1.0399342775344849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8058860301971436, "epoch": 8.93, "learning_rate": 5.9547290316521085e-06, "loss": 0.7149, "step": 10562, "task_loss": 0.8399339914321899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3476080894470215, "epoch": 8.93, "learning_rate": 5.9500328731097966e-06, "loss": 0.4805, "step": 10563, "task_loss": 0.6809529066085815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6241342425346375, "epoch": 8.93, "learning_rate": 5.945336714567485e-06, "loss": 0.4459, "step": 10564, "task_loss": 1.3063066005706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.900982141494751, "epoch": 8.93, "learning_rate": 5.940640556025172e-06, "loss": 0.598, "step": 10565, "task_loss": 0.7863163352012634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4424629211425781, "epoch": 8.93, "learning_rate": 5.935944397482859e-06, "loss": 0.7145, "step": 10566, "task_loss": 0.8460557460784912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.538854718208313, "epoch": 8.93, "learning_rate": 5.931248238940547e-06, "loss": 0.6042, "step": 10567, "task_loss": 0.26784974336624146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9073569178581238, "epoch": 8.93, "learning_rate": 5.926552080398234e-06, "loss": 0.5776, "step": 10568, "task_loss": 0.6299015879631042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8600467443466187, "epoch": 8.93, "learning_rate": 5.921855921855922e-06, "loss": 0.5787, "step": 10569, "task_loss": 0.7108116149902344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7636242508888245, "epoch": 8.93, "learning_rate": 5.917159763313609e-06, "loss": 0.6909, "step": 10570, "task_loss": 0.3891683518886566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4737194776535034, "epoch": 8.94, "learning_rate": 5.9124636047712974e-06, "loss": 0.4325, "step": 10571, "task_loss": 1.2649235725402832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5232113599777222, "epoch": 8.94, "learning_rate": 5.9077674462289855e-06, "loss": 0.5092, "step": 10572, "task_loss": 0.3710166811943054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6406015157699585, "epoch": 8.94, "learning_rate": 5.903071287686673e-06, "loss": 0.582, "step": 10573, "task_loss": 1.0622791051864624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28076452016830444, "epoch": 8.94, "learning_rate": 5.89837512914436e-06, "loss": 0.4897, "step": 10574, "task_loss": 0.22033900022506714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.73424232006073, "epoch": 8.94, "learning_rate": 5.893678970602048e-06, "loss": 0.4436, "step": 10575, "task_loss": 0.33071139454841614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4246208071708679, "epoch": 8.94, "learning_rate": 5.888982812059735e-06, "loss": 0.5423, "step": 10576, "task_loss": 0.22242748737335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38111612200737, "epoch": 8.94, "learning_rate": 5.884286653517423e-06, "loss": 0.6167, "step": 10577, "task_loss": 0.30698180198669434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5411278009414673, "epoch": 8.94, "learning_rate": 5.87959049497511e-06, "loss": 0.7413, "step": 10578, "task_loss": 0.8103427886962891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43713247776031494, "epoch": 8.94, "learning_rate": 5.874894336432798e-06, "loss": 0.7195, "step": 10579, "task_loss": 0.9134210348129272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3005964457988739, "epoch": 8.94, "learning_rate": 5.870198177890486e-06, "loss": 0.451, "step": 10580, "task_loss": 0.2694101631641388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49510639905929565, "epoch": 8.94, "learning_rate": 5.8655020193481735e-06, "loss": 0.6588, "step": 10581, "task_loss": 0.6259457468986511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.403123140335083, "epoch": 8.94, "learning_rate": 5.860805860805861e-06, "loss": 0.4654, "step": 10582, "task_loss": 0.267294317483902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.542702317237854, "epoch": 8.95, "learning_rate": 5.856109702263549e-06, "loss": 0.6111, "step": 10583, "task_loss": 0.11389429867267609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36455023288726807, "epoch": 8.95, "learning_rate": 5.851413543721236e-06, "loss": 0.4608, "step": 10584, "task_loss": 0.12865935266017914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4708772897720337, "epoch": 8.95, "learning_rate": 5.846717385178924e-06, "loss": 0.4848, "step": 10585, "task_loss": 0.24009481072425842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7306826114654541, "epoch": 8.95, "learning_rate": 5.842021226636612e-06, "loss": 0.5548, "step": 10586, "task_loss": 0.9158787727355957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7236275672912598, "epoch": 8.95, "learning_rate": 5.837325068094299e-06, "loss": 0.5932, "step": 10587, "task_loss": 1.0569521188735962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7704841494560242, "epoch": 8.95, "learning_rate": 5.832628909551987e-06, "loss": 0.4546, "step": 10588, "task_loss": 0.8241244554519653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5749755501747131, "epoch": 8.95, "learning_rate": 5.8279327510096744e-06, "loss": 0.5516, "step": 10589, "task_loss": 0.8147531747817993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48853567242622375, "epoch": 8.95, "learning_rate": 5.823236592467362e-06, "loss": 0.6355, "step": 10590, "task_loss": 0.29662421345710754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3158225417137146, "epoch": 8.95, "learning_rate": 5.81854043392505e-06, "loss": 0.4613, "step": 10591, "task_loss": 0.4538353681564331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8548911809921265, "epoch": 8.95, "learning_rate": 5.813844275382737e-06, "loss": 0.7016, "step": 10592, "task_loss": 1.8272366523742676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3361542820930481, "epoch": 8.95, "learning_rate": 5.809148116840425e-06, "loss": 0.4179, "step": 10593, "task_loss": 0.23179684579372406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2522212266921997, "epoch": 8.95, "learning_rate": 5.804451958298113e-06, "loss": 0.6995, "step": 10594, "task_loss": 1.558007001876831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7357101440429688, "epoch": 8.96, "learning_rate": 5.7997557997558e-06, "loss": 0.5646, "step": 10595, "task_loss": 1.6892472505569458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2751644253730774, "epoch": 8.96, "learning_rate": 5.795059641213488e-06, "loss": 0.4064, "step": 10596, "task_loss": 0.4793139696121216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4209744334220886, "epoch": 8.96, "learning_rate": 5.790363482671175e-06, "loss": 0.5497, "step": 10597, "task_loss": 0.5476157069206238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.344992995262146, "epoch": 8.96, "learning_rate": 5.7856673241288625e-06, "loss": 0.4452, "step": 10598, "task_loss": 0.5873465538024902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27394407987594604, "epoch": 8.96, "learning_rate": 5.7809711655865505e-06, "loss": 0.4575, "step": 10599, "task_loss": 0.5968269109725952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.521414041519165, "epoch": 8.96, "learning_rate": 5.776275007044238e-06, "loss": 0.5233, "step": 10600, "task_loss": 0.5214977264404297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.617308497428894, "epoch": 8.96, "learning_rate": 5.771578848501926e-06, "loss": 0.6645, "step": 10601, "task_loss": 0.45544153451919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7717319130897522, "epoch": 8.96, "learning_rate": 5.766882689959614e-06, "loss": 0.5364, "step": 10602, "task_loss": 1.0179121494293213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.914208710193634, "epoch": 8.96, "learning_rate": 5.762186531417301e-06, "loss": 0.6355, "step": 10603, "task_loss": 0.8773550987243652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5308414697647095, "epoch": 8.96, "learning_rate": 5.757490372874989e-06, "loss": 0.6215, "step": 10604, "task_loss": 0.6074046492576599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5941941738128662, "epoch": 8.96, "learning_rate": 5.752794214332676e-06, "loss": 0.5265, "step": 10605, "task_loss": 1.334627389907837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6161577105522156, "epoch": 8.96, "learning_rate": 5.748098055790363e-06, "loss": 0.7118, "step": 10606, "task_loss": 1.05649995803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42733895778656006, "epoch": 8.97, "learning_rate": 5.743401897248051e-06, "loss": 0.673, "step": 10607, "task_loss": 0.5426110029220581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47834107279777527, "epoch": 8.97, "learning_rate": 5.738705738705739e-06, "loss": 0.6304, "step": 10608, "task_loss": 0.6959487199783325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2916804552078247, "epoch": 8.97, "learning_rate": 5.734009580163427e-06, "loss": 0.4511, "step": 10609, "task_loss": 0.5602156519889832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6137771606445312, "epoch": 8.97, "learning_rate": 5.729313421621115e-06, "loss": 0.6402, "step": 10610, "task_loss": 0.2187805473804474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4819800555706024, "epoch": 8.97, "learning_rate": 5.724617263078802e-06, "loss": 0.4629, "step": 10611, "task_loss": 1.5057392120361328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5611621737480164, "epoch": 8.97, "learning_rate": 5.71992110453649e-06, "loss": 0.5181, "step": 10612, "task_loss": 0.620520293712616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3776792287826538, "epoch": 8.97, "learning_rate": 5.715224945994177e-06, "loss": 0.5578, "step": 10613, "task_loss": 0.8055549263954163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4176749289035797, "epoch": 8.97, "learning_rate": 5.710528787451864e-06, "loss": 0.4776, "step": 10614, "task_loss": 0.5071150064468384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7397521734237671, "epoch": 8.97, "learning_rate": 5.705832628909552e-06, "loss": 0.5064, "step": 10615, "task_loss": 0.9772310256958008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6495307683944702, "epoch": 8.97, "learning_rate": 5.7011364703672395e-06, "loss": 0.6405, "step": 10616, "task_loss": 0.592419445514679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7125726342201233, "epoch": 8.97, "learning_rate": 5.6964403118249275e-06, "loss": 0.5695, "step": 10617, "task_loss": 0.2861241102218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5696787238121033, "epoch": 8.97, "learning_rate": 5.6917441532826155e-06, "loss": 0.4747, "step": 10618, "task_loss": 0.8371036052703857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7022693753242493, "epoch": 8.98, "learning_rate": 5.687047994740303e-06, "loss": 0.6533, "step": 10619, "task_loss": 0.2178897261619568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.773979663848877, "epoch": 8.98, "learning_rate": 5.682351836197991e-06, "loss": 0.5448, "step": 10620, "task_loss": 0.45013806223869324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4895588457584381, "epoch": 8.98, "learning_rate": 5.677655677655678e-06, "loss": 0.719, "step": 10621, "task_loss": 0.5551857948303223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6219139099121094, "epoch": 8.98, "learning_rate": 5.672959519113365e-06, "loss": 0.5339, "step": 10622, "task_loss": 0.7996835112571716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4924367070198059, "epoch": 8.98, "learning_rate": 5.668263360571053e-06, "loss": 0.5726, "step": 10623, "task_loss": 0.12421528995037079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6398635506629944, "epoch": 8.98, "learning_rate": 5.66356720202874e-06, "loss": 0.6373, "step": 10624, "task_loss": 0.9792771339416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43375566601753235, "epoch": 8.98, "learning_rate": 5.658871043486428e-06, "loss": 0.4617, "step": 10625, "task_loss": 0.6341457962989807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39524757862091064, "epoch": 8.98, "learning_rate": 5.6541748849441164e-06, "loss": 0.4117, "step": 10626, "task_loss": 0.36518457531929016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38290005922317505, "epoch": 8.98, "learning_rate": 5.649478726401804e-06, "loss": 0.5015, "step": 10627, "task_loss": 0.21354712545871735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42722660303115845, "epoch": 8.98, "learning_rate": 5.644782567859491e-06, "loss": 0.4955, "step": 10628, "task_loss": 0.7511085867881775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5205132365226746, "epoch": 8.98, "learning_rate": 5.640086409317179e-06, "loss": 0.4821, "step": 10629, "task_loss": 0.475449800491333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37586677074432373, "epoch": 8.99, "learning_rate": 5.635390250774866e-06, "loss": 0.6439, "step": 10630, "task_loss": 0.8518162965774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7536553144454956, "epoch": 8.99, "learning_rate": 5.630694092232554e-06, "loss": 0.63, "step": 10631, "task_loss": 0.44915422797203064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.757983922958374, "epoch": 8.99, "learning_rate": 5.625997933690241e-06, "loss": 0.6434, "step": 10632, "task_loss": 0.7227051258087158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46571505069732666, "epoch": 8.99, "learning_rate": 5.621301775147929e-06, "loss": 0.4219, "step": 10633, "task_loss": 0.6484419107437134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45685186982154846, "epoch": 8.99, "learning_rate": 5.616605616605617e-06, "loss": 0.4767, "step": 10634, "task_loss": 0.4895040988922119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23881344497203827, "epoch": 8.99, "learning_rate": 5.6119094580633045e-06, "loss": 0.5771, "step": 10635, "task_loss": 0.20051783323287964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5274030566215515, "epoch": 8.99, "learning_rate": 5.607213299520992e-06, "loss": 0.5647, "step": 10636, "task_loss": 0.18868543207645416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5698296427726746, "epoch": 8.99, "learning_rate": 5.60251714097868e-06, "loss": 0.5355, "step": 10637, "task_loss": 0.436833918094635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5624359250068665, "epoch": 8.99, "learning_rate": 5.597820982436367e-06, "loss": 0.5512, "step": 10638, "task_loss": 1.210066795349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6097291111946106, "epoch": 8.99, "learning_rate": 5.593124823894055e-06, "loss": 0.5343, "step": 10639, "task_loss": 1.423251986503601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5431709289550781, "epoch": 8.99, "learning_rate": 5.588428665351743e-06, "loss": 0.6186, "step": 10640, "task_loss": 0.6638689637184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37089869379997253, "epoch": 8.99, "learning_rate": 5.58373250680943e-06, "loss": 0.4626, "step": 10641, "task_loss": 1.4731700420379639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5372847318649292, "epoch": 9.0, "learning_rate": 5.579036348267118e-06, "loss": 0.5969, "step": 10642, "task_loss": 0.14681988954544067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5942049026489258, "epoch": 9.0, "learning_rate": 5.574340189724805e-06, "loss": 0.5654, "step": 10643, "task_loss": 0.41353386640548706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4840534031391144, "epoch": 9.0, "learning_rate": 5.5696440311824926e-06, "loss": 0.5238, "step": 10644, "task_loss": 0.6436223387718201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0348104238510132, "epoch": 9.0, "learning_rate": 5.564947872640181e-06, "loss": 0.5447, "step": 10645, "task_loss": 0.5985457897186279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49458739161491394, "epoch": 9.0, "learning_rate": 5.560251714097868e-06, "loss": 0.5415, "step": 10646, "task_loss": 0.5283011794090271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3433035612106323, "epoch": 9.0, "learning_rate": 5.555555555555556e-06, "loss": 0.482, "step": 10647, "task_loss": 0.7570391893386841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.407757967710495, "epoch": 9.0, "learning_rate": 5.550859397013244e-06, "loss": 0.8095, "step": 10648, "task_loss": 0.23411524295806885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5599245429039001, "epoch": 9.0, "learning_rate": 5.546163238470931e-06, "loss": 0.5671, "step": 10649, "task_loss": 0.6180043816566467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8195317983627319, "epoch": 9.0, "learning_rate": 5.541467079928619e-06, "loss": 0.5945, "step": 10650, "task_loss": 0.719485342502594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35114070773124695, "epoch": 9.0, "learning_rate": 5.536770921386306e-06, "loss": 0.5078, "step": 10651, "task_loss": 0.8089998364448547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5093770623207092, "epoch": 9.0, "learning_rate": 5.5320747628439934e-06, "loss": 0.5048, "step": 10652, "task_loss": 0.2606903612613678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34760093688964844, "epoch": 9.01, "learning_rate": 5.5273786043016815e-06, "loss": 0.45, "step": 10653, "task_loss": 0.07798047363758087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34830141067504883, "epoch": 9.01, "learning_rate": 5.522682445759369e-06, "loss": 0.6918, "step": 10654, "task_loss": 0.45146510004997253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.22173309326171875, "epoch": 9.01, "learning_rate": 5.517986287217057e-06, "loss": 0.3804, "step": 10655, "task_loss": 0.036402247846126556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39955711364746094, "epoch": 9.01, "learning_rate": 5.513290128674745e-06, "loss": 0.6095, "step": 10656, "task_loss": 0.3125511109828949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9853004813194275, "epoch": 9.01, "learning_rate": 5.508593970132432e-06, "loss": 0.7771, "step": 10657, "task_loss": 1.1309833526611328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5019111633300781, "epoch": 9.01, "learning_rate": 5.50389781159012e-06, "loss": 0.4234, "step": 10658, "task_loss": 0.544341504573822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5345777869224548, "epoch": 9.01, "learning_rate": 5.499201653047807e-06, "loss": 0.5498, "step": 10659, "task_loss": 0.8023315072059631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44157788157463074, "epoch": 9.01, "learning_rate": 5.494505494505494e-06, "loss": 0.4246, "step": 10660, "task_loss": 1.2218419313430786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6741924285888672, "epoch": 9.01, "learning_rate": 5.489809335963182e-06, "loss": 0.5145, "step": 10661, "task_loss": 1.5363187789916992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5180450677871704, "epoch": 9.01, "learning_rate": 5.4851131774208696e-06, "loss": 0.5938, "step": 10662, "task_loss": 0.9459388852119446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7340129017829895, "epoch": 9.01, "learning_rate": 5.480417018878558e-06, "loss": 0.5409, "step": 10663, "task_loss": 0.998272180557251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38078004121780396, "epoch": 9.01, "learning_rate": 5.475720860336246e-06, "loss": 0.6563, "step": 10664, "task_loss": 0.15468423068523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.441752552986145, "epoch": 9.02, "learning_rate": 5.471024701793933e-06, "loss": 0.5418, "step": 10665, "task_loss": 1.0044411420822144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5145384073257446, "epoch": 9.02, "learning_rate": 5.466328543251621e-06, "loss": 0.5232, "step": 10666, "task_loss": 0.5512675046920776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5974806547164917, "epoch": 9.02, "learning_rate": 5.461632384709308e-06, "loss": 0.4971, "step": 10667, "task_loss": 1.0341695547103882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5553049445152283, "epoch": 9.02, "learning_rate": 5.456936226166995e-06, "loss": 0.5026, "step": 10668, "task_loss": 0.6006874442100525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5564650893211365, "epoch": 9.02, "learning_rate": 5.452240067624683e-06, "loss": 0.6138, "step": 10669, "task_loss": 0.7951217889785767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7239872217178345, "epoch": 9.02, "learning_rate": 5.4475439090823704e-06, "loss": 0.5086, "step": 10670, "task_loss": 0.6583983302116394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4491655230522156, "epoch": 9.02, "learning_rate": 5.4428477505400585e-06, "loss": 0.5441, "step": 10671, "task_loss": 0.5809100866317749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.528498113155365, "epoch": 9.02, "learning_rate": 5.4381515919977465e-06, "loss": 0.4721, "step": 10672, "task_loss": 1.106186866760254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4860708713531494, "epoch": 9.02, "learning_rate": 5.433455433455434e-06, "loss": 0.659, "step": 10673, "task_loss": 0.3552553057670593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7622365951538086, "epoch": 9.02, "learning_rate": 5.428759274913122e-06, "loss": 0.5915, "step": 10674, "task_loss": 1.056890845298767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47468799352645874, "epoch": 9.02, "learning_rate": 5.424063116370809e-06, "loss": 0.5797, "step": 10675, "task_loss": 0.1666695773601532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36272135376930237, "epoch": 9.02, "learning_rate": 5.419366957828496e-06, "loss": 0.3778, "step": 10676, "task_loss": 0.4107912480831146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2222643345594406, "epoch": 9.03, "learning_rate": 5.414670799286184e-06, "loss": 0.3718, "step": 10677, "task_loss": 0.06729756295681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8407806158065796, "epoch": 9.03, "learning_rate": 5.409974640743871e-06, "loss": 0.5842, "step": 10678, "task_loss": 0.739843487739563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5900310277938843, "epoch": 9.03, "learning_rate": 5.405278482201559e-06, "loss": 0.4986, "step": 10679, "task_loss": 0.6322044134140015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48569199442863464, "epoch": 9.03, "learning_rate": 5.400582323659247e-06, "loss": 0.4689, "step": 10680, "task_loss": 0.31120049953460693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32002347707748413, "epoch": 9.03, "learning_rate": 5.3958861651169346e-06, "loss": 0.5822, "step": 10681, "task_loss": 0.77793288230896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.776505708694458, "epoch": 9.03, "learning_rate": 5.391190006574623e-06, "loss": 0.7033, "step": 10682, "task_loss": 1.2588181495666504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5574590563774109, "epoch": 9.03, "learning_rate": 5.38649384803231e-06, "loss": 0.4978, "step": 10683, "task_loss": 0.3758609890937805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5000337958335876, "epoch": 9.03, "learning_rate": 5.381797689489997e-06, "loss": 0.5536, "step": 10684, "task_loss": 0.615101158618927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3912767767906189, "epoch": 9.03, "learning_rate": 5.377101530947685e-06, "loss": 0.377, "step": 10685, "task_loss": 0.5905370712280273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6533083319664001, "epoch": 9.03, "learning_rate": 5.372405372405373e-06, "loss": 0.5492, "step": 10686, "task_loss": 1.3165231943130493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4646679162979126, "epoch": 9.03, "learning_rate": 5.36770921386306e-06, "loss": 0.6037, "step": 10687, "task_loss": 0.6696792840957642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4812178611755371, "epoch": 9.03, "learning_rate": 5.363013055320748e-06, "loss": 0.5397, "step": 10688, "task_loss": 0.9114646315574646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35127007961273193, "epoch": 9.04, "learning_rate": 5.3583168967784355e-06, "loss": 0.4817, "step": 10689, "task_loss": 0.2912335991859436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4510999321937561, "epoch": 9.04, "learning_rate": 5.3536207382361235e-06, "loss": 0.5138, "step": 10690, "task_loss": 1.0328394174575806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7420287132263184, "epoch": 9.04, "learning_rate": 5.348924579693811e-06, "loss": 0.6592, "step": 10691, "task_loss": 1.5977141857147217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.584267258644104, "epoch": 9.04, "learning_rate": 5.344228421151498e-06, "loss": 0.501, "step": 10692, "task_loss": 0.8599151372909546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4502565562725067, "epoch": 9.04, "learning_rate": 5.339532262609186e-06, "loss": 0.574, "step": 10693, "task_loss": 1.053470253944397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34601396322250366, "epoch": 9.04, "learning_rate": 5.334836104066874e-06, "loss": 0.4976, "step": 10694, "task_loss": 0.870319128036499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30170923471450806, "epoch": 9.04, "learning_rate": 5.330139945524561e-06, "loss": 0.4996, "step": 10695, "task_loss": 1.048630714416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32882335782051086, "epoch": 9.04, "learning_rate": 5.325443786982249e-06, "loss": 0.5102, "step": 10696, "task_loss": 0.5748003721237183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2617471218109131, "epoch": 9.04, "learning_rate": 5.320747628439936e-06, "loss": 0.5094, "step": 10697, "task_loss": 0.1973281353712082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7230280637741089, "epoch": 9.04, "learning_rate": 5.316051469897624e-06, "loss": 0.636, "step": 10698, "task_loss": 1.0312447547912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.499292254447937, "epoch": 9.04, "learning_rate": 5.3113553113553116e-06, "loss": 0.4413, "step": 10699, "task_loss": 0.5267291069030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7206746935844421, "epoch": 9.04, "learning_rate": 5.306659152812999e-06, "loss": 0.5219, "step": 10700, "task_loss": 0.582303524017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.18800894916057587, "epoch": 9.05, "learning_rate": 5.301962994270687e-06, "loss": 0.3929, "step": 10701, "task_loss": 0.00767991878092289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6911872625350952, "epoch": 9.05, "learning_rate": 5.297266835728375e-06, "loss": 0.6601, "step": 10702, "task_loss": 0.403439998626709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6180187463760376, "epoch": 9.05, "learning_rate": 5.292570677186062e-06, "loss": 0.4926, "step": 10703, "task_loss": 0.4618394076824188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8982678055763245, "epoch": 9.05, "learning_rate": 5.28787451864375e-06, "loss": 0.7141, "step": 10704, "task_loss": 1.3920810222625732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42901918292045593, "epoch": 9.05, "learning_rate": 5.283178360101437e-06, "loss": 0.4633, "step": 10705, "task_loss": 0.6130470037460327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5027579069137573, "epoch": 9.05, "learning_rate": 5.278482201559124e-06, "loss": 0.5714, "step": 10706, "task_loss": 0.5893825888633728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5195333957672119, "epoch": 9.05, "learning_rate": 5.2737860430168124e-06, "loss": 0.657, "step": 10707, "task_loss": 1.0011210441589355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3038981556892395, "epoch": 9.05, "learning_rate": 5.2690898844745e-06, "loss": 0.4822, "step": 10708, "task_loss": 1.102056860923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4899473190307617, "epoch": 9.05, "learning_rate": 5.264393725932188e-06, "loss": 0.489, "step": 10709, "task_loss": 0.2596272826194763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5682339668273926, "epoch": 9.05, "learning_rate": 5.259697567389876e-06, "loss": 0.6632, "step": 10710, "task_loss": 0.5928970575332642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5781933069229126, "epoch": 9.05, "learning_rate": 5.255001408847563e-06, "loss": 0.5011, "step": 10711, "task_loss": 0.5947277545928955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.524718701839447, "epoch": 9.05, "learning_rate": 5.250305250305251e-06, "loss": 0.5594, "step": 10712, "task_loss": 0.6361549496650696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5746286511421204, "epoch": 9.06, "learning_rate": 5.245609091762938e-06, "loss": 0.5644, "step": 10713, "task_loss": 0.21720650792121887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4651991128921509, "epoch": 9.06, "learning_rate": 5.240912933220625e-06, "loss": 0.5575, "step": 10714, "task_loss": 0.9711751937866211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8578423261642456, "epoch": 9.06, "learning_rate": 5.236216774678313e-06, "loss": 0.5909, "step": 10715, "task_loss": 1.534393310546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23297281563282013, "epoch": 9.06, "learning_rate": 5.2315206161360005e-06, "loss": 0.445, "step": 10716, "task_loss": 0.3661234974861145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48204970359802246, "epoch": 9.06, "learning_rate": 5.2268244575936885e-06, "loss": 0.6647, "step": 10717, "task_loss": 0.7805172801017761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4789668023586273, "epoch": 9.06, "learning_rate": 5.2221282990513766e-06, "loss": 0.5711, "step": 10718, "task_loss": 0.20756149291992188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9267908334732056, "epoch": 9.06, "learning_rate": 5.217432140509064e-06, "loss": 0.864, "step": 10719, "task_loss": 0.9171956777572632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6839046478271484, "epoch": 9.06, "learning_rate": 5.212735981966752e-06, "loss": 0.4646, "step": 10720, "task_loss": 0.512065589427948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3602970242500305, "epoch": 9.06, "learning_rate": 5.208039823424439e-06, "loss": 0.5157, "step": 10721, "task_loss": 0.511012852191925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3568427562713623, "epoch": 9.06, "learning_rate": 5.203343664882126e-06, "loss": 0.4484, "step": 10722, "task_loss": 0.04860250651836395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7669222354888916, "epoch": 9.06, "learning_rate": 5.198647506339814e-06, "loss": 0.5706, "step": 10723, "task_loss": 0.23311369121074677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30617988109588623, "epoch": 9.07, "learning_rate": 5.193951347797501e-06, "loss": 0.6441, "step": 10724, "task_loss": 0.2729555666446686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47705668210983276, "epoch": 9.07, "learning_rate": 5.189255189255189e-06, "loss": 0.5855, "step": 10725, "task_loss": 0.9603689312934875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4342404901981354, "epoch": 9.07, "learning_rate": 5.1845590307128775e-06, "loss": 0.5594, "step": 10726, "task_loss": 0.7274681925773621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7334962487220764, "epoch": 9.07, "learning_rate": 5.179862872170565e-06, "loss": 0.7331, "step": 10727, "task_loss": 1.028825283050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36315786838531494, "epoch": 9.07, "learning_rate": 5.175166713628253e-06, "loss": 0.6233, "step": 10728, "task_loss": 0.48624899983406067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6651328802108765, "epoch": 9.07, "learning_rate": 5.17047055508594e-06, "loss": 0.6292, "step": 10729, "task_loss": 0.8246500492095947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5990895628929138, "epoch": 9.07, "learning_rate": 5.165774396543627e-06, "loss": 0.5885, "step": 10730, "task_loss": 1.2375344038009644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4557642340660095, "epoch": 9.07, "learning_rate": 5.161078238001315e-06, "loss": 0.4836, "step": 10731, "task_loss": 0.5624407529830933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25203099846839905, "epoch": 9.07, "learning_rate": 5.156382079459002e-06, "loss": 0.5009, "step": 10732, "task_loss": 0.17915724217891693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7802634239196777, "epoch": 9.07, "learning_rate": 5.15168592091669e-06, "loss": 0.5887, "step": 10733, "task_loss": 0.898050844669342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6418172121047974, "epoch": 9.07, "learning_rate": 5.146989762374378e-06, "loss": 0.5334, "step": 10734, "task_loss": 0.6073089838027954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2364717423915863, "epoch": 9.07, "learning_rate": 5.1422936038320655e-06, "loss": 0.355, "step": 10735, "task_loss": 0.016945166513323784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7723769545555115, "epoch": 9.08, "learning_rate": 5.1375974452897536e-06, "loss": 0.5572, "step": 10736, "task_loss": 0.9320164918899536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34681376814842224, "epoch": 9.08, "learning_rate": 5.132901286747441e-06, "loss": 0.4558, "step": 10737, "task_loss": 0.9558616280555725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.543147087097168, "epoch": 9.08, "learning_rate": 5.128205128205128e-06, "loss": 0.4375, "step": 10738, "task_loss": 1.7354140281677246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44907259941101074, "epoch": 9.08, "learning_rate": 5.123508969662816e-06, "loss": 0.6413, "step": 10739, "task_loss": 0.891435444355011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4230945110321045, "epoch": 9.08, "learning_rate": 5.118812811120504e-06, "loss": 0.4144, "step": 10740, "task_loss": 0.45406728982925415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4339078664779663, "epoch": 9.08, "learning_rate": 5.114116652578191e-06, "loss": 0.6375, "step": 10741, "task_loss": 0.813127875328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31925180554389954, "epoch": 9.08, "learning_rate": 5.109420494035879e-06, "loss": 0.4648, "step": 10742, "task_loss": 0.7864236235618591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3511282801628113, "epoch": 9.08, "learning_rate": 5.104724335493566e-06, "loss": 0.7055, "step": 10743, "task_loss": 0.30674657225608826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5844024419784546, "epoch": 9.08, "learning_rate": 5.1000281769512544e-06, "loss": 0.5513, "step": 10744, "task_loss": 0.8306488990783691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9641013145446777, "epoch": 9.08, "learning_rate": 5.095332018408942e-06, "loss": 0.6464, "step": 10745, "task_loss": 0.8551418781280518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2794591188430786, "epoch": 9.08, "learning_rate": 5.090635859866629e-06, "loss": 0.4101, "step": 10746, "task_loss": 0.3487391769886017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4486106038093567, "epoch": 9.08, "learning_rate": 5.085939701324317e-06, "loss": 0.4133, "step": 10747, "task_loss": 1.3279588222503662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6490987539291382, "epoch": 9.09, "learning_rate": 5.081243542782005e-06, "loss": 0.5962, "step": 10748, "task_loss": 0.41047894954681396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3758861720561981, "epoch": 9.09, "learning_rate": 5.076547384239692e-06, "loss": 0.4599, "step": 10749, "task_loss": 0.7316484451293945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33905941247940063, "epoch": 9.09, "learning_rate": 5.07185122569738e-06, "loss": 0.4225, "step": 10750, "task_loss": 0.2673392593860626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34627819061279297, "epoch": 9.09, "learning_rate": 5.067155067155067e-06, "loss": 0.6528, "step": 10751, "task_loss": 0.5335155725479126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5842825174331665, "epoch": 9.09, "learning_rate": 5.062458908612755e-06, "loss": 0.51, "step": 10752, "task_loss": 0.533616840839386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5547326803207397, "epoch": 9.09, "learning_rate": 5.0577627500704425e-06, "loss": 0.4768, "step": 10753, "task_loss": 0.2990473508834839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6757566928863525, "epoch": 9.09, "learning_rate": 5.05306659152813e-06, "loss": 0.4764, "step": 10754, "task_loss": 0.9881812334060669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.19561654329299927, "epoch": 9.09, "learning_rate": 5.048370432985818e-06, "loss": 0.5194, "step": 10755, "task_loss": 0.40456876158714294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4779481291770935, "epoch": 9.09, "learning_rate": 5.043674274443506e-06, "loss": 0.5103, "step": 10756, "task_loss": 0.33315205574035645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34403663873672485, "epoch": 9.09, "learning_rate": 5.038978115901193e-06, "loss": 0.4056, "step": 10757, "task_loss": 0.36416274309158325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39299169182777405, "epoch": 9.09, "learning_rate": 5.034281957358881e-06, "loss": 0.5137, "step": 10758, "task_loss": 0.9803383946418762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39156779646873474, "epoch": 9.09, "learning_rate": 5.029585798816568e-06, "loss": 0.5393, "step": 10759, "task_loss": 0.7420110106468201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4752480089664459, "epoch": 9.1, "learning_rate": 5.024889640274256e-06, "loss": 0.5198, "step": 10760, "task_loss": 0.03501179814338684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44035688042640686, "epoch": 9.1, "learning_rate": 5.020193481731943e-06, "loss": 0.6122, "step": 10761, "task_loss": 0.19357067346572876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4493807852268219, "epoch": 9.1, "learning_rate": 5.0154973231896306e-06, "loss": 0.4867, "step": 10762, "task_loss": 0.7728389501571655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31514665484428406, "epoch": 9.1, "learning_rate": 5.0108011646473195e-06, "loss": 0.4769, "step": 10763, "task_loss": 0.6905890703201294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49011969566345215, "epoch": 9.1, "learning_rate": 5.006105006105007e-06, "loss": 0.4738, "step": 10764, "task_loss": 0.30710792541503906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45383813977241516, "epoch": 9.1, "learning_rate": 5.001408847562694e-06, "loss": 0.7214, "step": 10765, "task_loss": 1.0173850059509277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4809972047805786, "epoch": 9.1, "learning_rate": 4.996712689020382e-06, "loss": 0.6575, "step": 10766, "task_loss": 0.5342851281166077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5289740562438965, "epoch": 9.1, "learning_rate": 4.992016530478069e-06, "loss": 0.5506, "step": 10767, "task_loss": 0.7263054251670837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.670602023601532, "epoch": 9.1, "learning_rate": 4.987320371935757e-06, "loss": 0.5047, "step": 10768, "task_loss": 0.40711352229118347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7047711610794067, "epoch": 9.1, "learning_rate": 4.982624213393444e-06, "loss": 0.5557, "step": 10769, "task_loss": 0.7503254413604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34603285789489746, "epoch": 9.1, "learning_rate": 4.9779280548511315e-06, "loss": 0.4481, "step": 10770, "task_loss": 0.1292446106672287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37457287311553955, "epoch": 9.1, "learning_rate": 4.97323189630882e-06, "loss": 0.476, "step": 10771, "task_loss": 0.40658149123191833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4471898674964905, "epoch": 9.11, "learning_rate": 4.9685357377665075e-06, "loss": 0.3963, "step": 10772, "task_loss": 0.7775158286094666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.662043571472168, "epoch": 9.11, "learning_rate": 4.963839579224195e-06, "loss": 0.5034, "step": 10773, "task_loss": 1.457747220993042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7967023253440857, "epoch": 9.11, "learning_rate": 4.959143420681883e-06, "loss": 0.6398, "step": 10774, "task_loss": 0.7316052913665771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5361703038215637, "epoch": 9.11, "learning_rate": 4.95444726213957e-06, "loss": 0.8014, "step": 10775, "task_loss": 0.8551503419876099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9573431611061096, "epoch": 9.11, "learning_rate": 4.949751103597258e-06, "loss": 0.5834, "step": 10776, "task_loss": 0.6662958264350891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3358168601989746, "epoch": 9.11, "learning_rate": 4.945054945054945e-06, "loss": 0.734, "step": 10777, "task_loss": 1.679290771484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42570048570632935, "epoch": 9.11, "learning_rate": 4.940358786512632e-06, "loss": 0.658, "step": 10778, "task_loss": 0.810636579990387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6662504076957703, "epoch": 9.11, "learning_rate": 4.935662627970321e-06, "loss": 0.6606, "step": 10779, "task_loss": 0.9691312313079834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4030231237411499, "epoch": 9.11, "learning_rate": 4.930966469428008e-06, "loss": 0.5056, "step": 10780, "task_loss": 1.1635175943374634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44069385528564453, "epoch": 9.11, "learning_rate": 4.926270310885696e-06, "loss": 0.5268, "step": 10781, "task_loss": 0.9235860705375671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6784002780914307, "epoch": 9.11, "learning_rate": 4.921574152343384e-06, "loss": 0.4624, "step": 10782, "task_loss": 1.0753692388534546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5162583589553833, "epoch": 9.11, "learning_rate": 4.916877993801071e-06, "loss": 0.4642, "step": 10783, "task_loss": 0.7312564849853516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6078613996505737, "epoch": 9.12, "learning_rate": 4.912181835258758e-06, "loss": 0.4801, "step": 10784, "task_loss": 0.34435930848121643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6030712127685547, "epoch": 9.12, "learning_rate": 4.907485676716446e-06, "loss": 0.6043, "step": 10785, "task_loss": 1.0006641149520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37262052297592163, "epoch": 9.12, "learning_rate": 4.902789518174133e-06, "loss": 0.4406, "step": 10786, "task_loss": 0.2642630934715271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6639752388000488, "epoch": 9.12, "learning_rate": 4.898093359631821e-06, "loss": 0.5826, "step": 10787, "task_loss": 0.7475026249885559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6616700291633606, "epoch": 9.12, "learning_rate": 4.893397201089509e-06, "loss": 0.5987, "step": 10788, "task_loss": 0.4118358790874481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7351030111312866, "epoch": 9.12, "learning_rate": 4.8887010425471965e-06, "loss": 0.6436, "step": 10789, "task_loss": 0.9862117767333984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4709773361682892, "epoch": 9.12, "learning_rate": 4.8840048840048845e-06, "loss": 0.4338, "step": 10790, "task_loss": 0.7355362772941589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5232301354408264, "epoch": 9.12, "learning_rate": 4.879308725462572e-06, "loss": 0.4295, "step": 10791, "task_loss": 0.34780845046043396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5388599038124084, "epoch": 9.12, "learning_rate": 4.874612566920259e-06, "loss": 0.5009, "step": 10792, "task_loss": 1.0823311805725098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5338149070739746, "epoch": 9.12, "learning_rate": 4.869916408377947e-06, "loss": 0.5472, "step": 10793, "task_loss": 1.07578444480896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4928658604621887, "epoch": 9.12, "learning_rate": 4.865220249835635e-06, "loss": 0.5866, "step": 10794, "task_loss": 0.4112739562988281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2564661502838135, "epoch": 9.13, "learning_rate": 4.860524091293322e-06, "loss": 0.4326, "step": 10795, "task_loss": 1.4365452527999878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35808420181274414, "epoch": 9.13, "learning_rate": 4.85582793275101e-06, "loss": 0.6037, "step": 10796, "task_loss": 0.5430862903594971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6631271243095398, "epoch": 9.13, "learning_rate": 4.851131774208697e-06, "loss": 0.5459, "step": 10797, "task_loss": 0.49775949120521545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5071085095405579, "epoch": 9.13, "learning_rate": 4.846435615666385e-06, "loss": 0.7329, "step": 10798, "task_loss": 0.7601740956306458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36447539925575256, "epoch": 9.13, "learning_rate": 4.841739457124073e-06, "loss": 0.4384, "step": 10799, "task_loss": 0.886769711971283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3594026267528534, "epoch": 9.13, "learning_rate": 4.83704329858176e-06, "loss": 0.5511, "step": 10800, "task_loss": 0.39929890632629395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46356400847435, "epoch": 9.13, "learning_rate": 4.832347140039448e-06, "loss": 0.4963, "step": 10801, "task_loss": 0.8840184807777405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4519862234592438, "epoch": 9.13, "learning_rate": 4.827650981497136e-06, "loss": 0.5311, "step": 10802, "task_loss": 1.7646710872650146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2951430082321167, "epoch": 9.13, "learning_rate": 4.822954822954823e-06, "loss": 0.5345, "step": 10803, "task_loss": 0.7790025472640991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5372967720031738, "epoch": 9.13, "learning_rate": 4.818258664412511e-06, "loss": 0.497, "step": 10804, "task_loss": 0.3373531401157379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7428412437438965, "epoch": 9.13, "learning_rate": 4.813562505870198e-06, "loss": 0.6931, "step": 10805, "task_loss": 0.7620967626571655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5248833894729614, "epoch": 9.13, "learning_rate": 4.808866347327886e-06, "loss": 0.6821, "step": 10806, "task_loss": 0.9320053458213806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5163807272911072, "epoch": 9.14, "learning_rate": 4.8041701887855735e-06, "loss": 0.4376, "step": 10807, "task_loss": 0.537032961845398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.584462583065033, "epoch": 9.14, "learning_rate": 4.799474030243261e-06, "loss": 0.632, "step": 10808, "task_loss": 0.5542635321617126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4270203709602356, "epoch": 9.14, "learning_rate": 4.794777871700949e-06, "loss": 0.4892, "step": 10809, "task_loss": 0.41152963042259216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6766055822372437, "epoch": 9.14, "learning_rate": 4.790081713158637e-06, "loss": 0.6922, "step": 10810, "task_loss": 1.0806504487991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43792641162872314, "epoch": 9.14, "learning_rate": 4.785385554616324e-06, "loss": 0.529, "step": 10811, "task_loss": 0.8878472447395325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2180333435535431, "epoch": 9.14, "learning_rate": 4.780689396074012e-06, "loss": 0.4153, "step": 10812, "task_loss": 0.16479995846748352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4482658803462982, "epoch": 9.14, "learning_rate": 4.775993237531699e-06, "loss": 0.4564, "step": 10813, "task_loss": 0.1626623123884201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.504636824131012, "epoch": 9.14, "learning_rate": 4.771297078989387e-06, "loss": 0.4248, "step": 10814, "task_loss": 0.17351432144641876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.548913300037384, "epoch": 9.14, "learning_rate": 4.766600920447074e-06, "loss": 0.4179, "step": 10815, "task_loss": 1.243747591972351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5018069744110107, "epoch": 9.14, "learning_rate": 4.7619047619047615e-06, "loss": 0.4569, "step": 10816, "task_loss": 0.6563104391098022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7317501306533813, "epoch": 9.14, "learning_rate": 4.75720860336245e-06, "loss": 0.5551, "step": 10817, "task_loss": 1.1124718189239502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42188531160354614, "epoch": 9.14, "learning_rate": 4.752512444820138e-06, "loss": 0.6695, "step": 10818, "task_loss": 0.5033867955207825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20971906185150146, "epoch": 9.15, "learning_rate": 4.747816286277825e-06, "loss": 0.4099, "step": 10819, "task_loss": 0.20005862414836884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5090563297271729, "epoch": 9.15, "learning_rate": 4.743120127735513e-06, "loss": 0.7195, "step": 10820, "task_loss": 0.49531421065330505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40505844354629517, "epoch": 9.15, "learning_rate": 4.7384239691932e-06, "loss": 0.4571, "step": 10821, "task_loss": 0.26272881031036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5439514517784119, "epoch": 9.15, "learning_rate": 4.733727810650888e-06, "loss": 0.5338, "step": 10822, "task_loss": 0.5342209339141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48501870036125183, "epoch": 9.15, "learning_rate": 4.729031652108575e-06, "loss": 0.5947, "step": 10823, "task_loss": 0.5919874906539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.508482038974762, "epoch": 9.15, "learning_rate": 4.724335493566262e-06, "loss": 0.3929, "step": 10824, "task_loss": 0.2973504662513733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9996296763420105, "epoch": 9.15, "learning_rate": 4.719639335023951e-06, "loss": 0.6826, "step": 10825, "task_loss": 0.7748823761940002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26406335830688477, "epoch": 9.15, "learning_rate": 4.7149431764816385e-06, "loss": 0.515, "step": 10826, "task_loss": 0.181888610124588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6267328262329102, "epoch": 9.15, "learning_rate": 4.710247017939326e-06, "loss": 0.5668, "step": 10827, "task_loss": 1.2325279712677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34023815393447876, "epoch": 9.15, "learning_rate": 4.705550859397014e-06, "loss": 0.5307, "step": 10828, "task_loss": 0.2356719970703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48738422989845276, "epoch": 9.15, "learning_rate": 4.700854700854701e-06, "loss": 0.5421, "step": 10829, "task_loss": 0.19997376203536987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7565172910690308, "epoch": 9.15, "learning_rate": 4.696158542312389e-06, "loss": 0.624, "step": 10830, "task_loss": 2.2238430976867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26180344820022583, "epoch": 9.16, "learning_rate": 4.691462383770076e-06, "loss": 0.5469, "step": 10831, "task_loss": 0.5909162759780884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.513268768787384, "epoch": 9.16, "learning_rate": 4.686766225227763e-06, "loss": 0.6554, "step": 10832, "task_loss": 0.7978126406669617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5059584379196167, "epoch": 9.16, "learning_rate": 4.682070066685452e-06, "loss": 0.58, "step": 10833, "task_loss": 0.7370092868804932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5216562151908875, "epoch": 9.16, "learning_rate": 4.677373908143139e-06, "loss": 0.4027, "step": 10834, "task_loss": 0.7828550338745117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4096694886684418, "epoch": 9.16, "learning_rate": 4.6726777496008265e-06, "loss": 0.6327, "step": 10835, "task_loss": 0.46410971879959106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.493735671043396, "epoch": 9.16, "learning_rate": 4.667981591058515e-06, "loss": 0.7325, "step": 10836, "task_loss": 1.030170202255249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9138497114181519, "epoch": 9.16, "learning_rate": 4.663285432516202e-06, "loss": 0.5628, "step": 10837, "task_loss": 1.2414358854293823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3063437342643738, "epoch": 9.16, "learning_rate": 4.65858927397389e-06, "loss": 0.6204, "step": 10838, "task_loss": 0.22206629812717438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8960245847702026, "epoch": 9.16, "learning_rate": 4.653893115431577e-06, "loss": 0.6959, "step": 10839, "task_loss": 0.5248209834098816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.730666995048523, "epoch": 9.16, "learning_rate": 4.649196956889264e-06, "loss": 0.5553, "step": 10840, "task_loss": 0.4806661605834961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.477531373500824, "epoch": 9.16, "learning_rate": 4.644500798346953e-06, "loss": 0.573, "step": 10841, "task_loss": 0.6099678874015808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2639722228050232, "epoch": 9.16, "learning_rate": 4.63980463980464e-06, "loss": 0.5589, "step": 10842, "task_loss": 0.35312169790267944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.590694010257721, "epoch": 9.17, "learning_rate": 4.6351084812623274e-06, "loss": 0.513, "step": 10843, "task_loss": 1.1163192987442017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5915704965591431, "epoch": 9.17, "learning_rate": 4.6304123227200155e-06, "loss": 0.7493, "step": 10844, "task_loss": 0.5995631814002991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28608807921409607, "epoch": 9.17, "learning_rate": 4.625716164177703e-06, "loss": 0.5645, "step": 10845, "task_loss": 0.155314639210701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6432895660400391, "epoch": 9.17, "learning_rate": 4.621020005635391e-06, "loss": 0.5592, "step": 10846, "task_loss": 0.6715465784072876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5430681705474854, "epoch": 9.17, "learning_rate": 4.616323847093078e-06, "loss": 0.4852, "step": 10847, "task_loss": 0.7355966567993164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4202154576778412, "epoch": 9.17, "learning_rate": 4.611627688550766e-06, "loss": 0.3948, "step": 10848, "task_loss": 0.5021507143974304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6153039932250977, "epoch": 9.17, "learning_rate": 4.606931530008454e-06, "loss": 0.5353, "step": 10849, "task_loss": 0.4856630861759186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48080381751060486, "epoch": 9.17, "learning_rate": 4.602235371466141e-06, "loss": 0.5159, "step": 10850, "task_loss": 0.945907473564148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47829657793045044, "epoch": 9.17, "learning_rate": 4.597539212923828e-06, "loss": 0.4869, "step": 10851, "task_loss": 0.8005138635635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5382614731788635, "epoch": 9.17, "learning_rate": 4.592843054381516e-06, "loss": 0.4734, "step": 10852, "task_loss": 0.7610917687416077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6903530359268188, "epoch": 9.17, "learning_rate": 4.5881468958392035e-06, "loss": 0.7259, "step": 10853, "task_loss": 1.2119572162628174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9453556537628174, "epoch": 9.17, "learning_rate": 4.5834507372968916e-06, "loss": 0.6248, "step": 10854, "task_loss": 1.0381531715393066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5619428157806396, "epoch": 9.18, "learning_rate": 4.578754578754579e-06, "loss": 0.6098, "step": 10855, "task_loss": 1.1913701295852661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7246257662773132, "epoch": 9.18, "learning_rate": 4.574058420212267e-06, "loss": 0.5533, "step": 10856, "task_loss": 1.5235695838928223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4764540493488312, "epoch": 9.18, "learning_rate": 4.569362261669955e-06, "loss": 0.5469, "step": 10857, "task_loss": 0.22802934050559998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.797978401184082, "epoch": 9.18, "learning_rate": 4.564666103127642e-06, "loss": 0.6318, "step": 10858, "task_loss": 0.49362775683403015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32643651962280273, "epoch": 9.18, "learning_rate": 4.559969944585329e-06, "loss": 0.4707, "step": 10859, "task_loss": 0.46889522671699524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25688478350639343, "epoch": 9.18, "learning_rate": 4.555273786043017e-06, "loss": 0.4528, "step": 10860, "task_loss": 0.4083934724330902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4214138090610504, "epoch": 9.18, "learning_rate": 4.550577627500704e-06, "loss": 0.5602, "step": 10861, "task_loss": 0.40938860177993774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4213910698890686, "epoch": 9.18, "learning_rate": 4.545881468958392e-06, "loss": 0.5055, "step": 10862, "task_loss": 0.16619780659675598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3822334408760071, "epoch": 9.18, "learning_rate": 4.54118531041608e-06, "loss": 0.3667, "step": 10863, "task_loss": 0.43113839626312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3260955810546875, "epoch": 9.18, "learning_rate": 4.536489151873768e-06, "loss": 0.5029, "step": 10864, "task_loss": 1.480652093887329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4976132810115814, "epoch": 9.18, "learning_rate": 4.531792993331455e-06, "loss": 0.4908, "step": 10865, "task_loss": 0.5951100587844849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31109315156936646, "epoch": 9.19, "learning_rate": 4.527096834789143e-06, "loss": 0.4729, "step": 10866, "task_loss": 1.3585652112960815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27515026926994324, "epoch": 9.19, "learning_rate": 4.52240067624683e-06, "loss": 0.4654, "step": 10867, "task_loss": 0.510993242263794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5281930565834045, "epoch": 9.19, "learning_rate": 4.517704517704518e-06, "loss": 0.5031, "step": 10868, "task_loss": 0.548530638217926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.679369330406189, "epoch": 9.19, "learning_rate": 4.513008359162205e-06, "loss": 0.5637, "step": 10869, "task_loss": 0.22896431386470795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6705942153930664, "epoch": 9.19, "learning_rate": 4.5083122006198925e-06, "loss": 0.5419, "step": 10870, "task_loss": 0.7425059080123901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6122167706489563, "epoch": 9.19, "learning_rate": 4.503616042077581e-06, "loss": 0.5378, "step": 10871, "task_loss": 1.1634297370910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6949007511138916, "epoch": 9.19, "learning_rate": 4.4989198835352685e-06, "loss": 0.7366, "step": 10872, "task_loss": 1.2006893157958984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4023021459579468, "epoch": 9.19, "learning_rate": 4.494223724992956e-06, "loss": 0.5056, "step": 10873, "task_loss": 0.24474762380123138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5639364719390869, "epoch": 9.19, "learning_rate": 4.489527566450644e-06, "loss": 0.6467, "step": 10874, "task_loss": 1.7069226503372192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3295495808124542, "epoch": 9.19, "learning_rate": 4.484831407908331e-06, "loss": 0.5429, "step": 10875, "task_loss": 0.46266359090805054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3693598210811615, "epoch": 9.19, "learning_rate": 4.480135249366019e-06, "loss": 0.6523, "step": 10876, "task_loss": 1.045897364616394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7294018268585205, "epoch": 9.19, "learning_rate": 4.475439090823706e-06, "loss": 0.535, "step": 10877, "task_loss": 0.5471966862678528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36468178033828735, "epoch": 9.2, "learning_rate": 4.470742932281393e-06, "loss": 0.4697, "step": 10878, "task_loss": 0.08477520942687988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7204464673995972, "epoch": 9.2, "learning_rate": 4.466046773739082e-06, "loss": 0.5768, "step": 10879, "task_loss": 1.8280061483383179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5907106399536133, "epoch": 9.2, "learning_rate": 4.4613506151967694e-06, "loss": 0.6047, "step": 10880, "task_loss": 0.15427885949611664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7866789102554321, "epoch": 9.2, "learning_rate": 4.456654456654457e-06, "loss": 0.5545, "step": 10881, "task_loss": 0.720936119556427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43583813309669495, "epoch": 9.2, "learning_rate": 4.451958298112145e-06, "loss": 0.5237, "step": 10882, "task_loss": 0.6013363003730774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48178181052207947, "epoch": 9.2, "learning_rate": 4.447262139569832e-06, "loss": 0.6936, "step": 10883, "task_loss": 0.6246921420097351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8088822960853577, "epoch": 9.2, "learning_rate": 4.44256598102752e-06, "loss": 0.6107, "step": 10884, "task_loss": 0.8761305212974548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4401460886001587, "epoch": 9.2, "learning_rate": 4.437869822485207e-06, "loss": 0.5644, "step": 10885, "task_loss": 0.0860443264245987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34712502360343933, "epoch": 9.2, "learning_rate": 4.433173663942894e-06, "loss": 0.5432, "step": 10886, "task_loss": 0.4822004735469818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8429248929023743, "epoch": 9.2, "learning_rate": 4.428477505400583e-06, "loss": 0.5896, "step": 10887, "task_loss": 1.1455215215682983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47533881664276123, "epoch": 9.2, "learning_rate": 4.42378134685827e-06, "loss": 0.4811, "step": 10888, "task_loss": 1.444555401802063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5070008635520935, "epoch": 9.2, "learning_rate": 4.4190851883159575e-06, "loss": 0.6044, "step": 10889, "task_loss": 0.40574362874031067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5617731809616089, "epoch": 9.21, "learning_rate": 4.4143890297736455e-06, "loss": 0.5326, "step": 10890, "task_loss": 1.3129022121429443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5047394037246704, "epoch": 9.21, "learning_rate": 4.409692871231333e-06, "loss": 0.5574, "step": 10891, "task_loss": 0.27070388197898865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.3662519454956055, "epoch": 9.21, "learning_rate": 4.404996712689021e-06, "loss": 0.6919, "step": 10892, "task_loss": 0.7481173276901245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6432501077651978, "epoch": 9.21, "learning_rate": 4.400300554146708e-06, "loss": 0.6158, "step": 10893, "task_loss": 0.874040961265564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5450224876403809, "epoch": 9.21, "learning_rate": 4.395604395604396e-06, "loss": 0.4155, "step": 10894, "task_loss": 0.7032738327980042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6036750078201294, "epoch": 9.21, "learning_rate": 4.390908237062084e-06, "loss": 0.5721, "step": 10895, "task_loss": 0.4670071601867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4691944420337677, "epoch": 9.21, "learning_rate": 4.386212078519771e-06, "loss": 0.6053, "step": 10896, "task_loss": 0.9119598865509033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4413752257823944, "epoch": 9.21, "learning_rate": 4.381515919977458e-06, "loss": 0.4408, "step": 10897, "task_loss": 0.44342488050460815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42831915616989136, "epoch": 9.21, "learning_rate": 4.376819761435146e-06, "loss": 0.4601, "step": 10898, "task_loss": 0.37882789969444275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5898022651672363, "epoch": 9.21, "learning_rate": 4.372123602892834e-06, "loss": 0.5215, "step": 10899, "task_loss": 0.8129600286483765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5273644328117371, "epoch": 9.21, "learning_rate": 4.367427444350522e-06, "loss": 0.5033, "step": 10900, "task_loss": 0.8919691443443298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5367013812065125, "epoch": 9.21, "learning_rate": 4.362731285808209e-06, "loss": 0.5728, "step": 10901, "task_loss": 0.8962957262992859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4875826835632324, "epoch": 9.22, "learning_rate": 4.358035127265897e-06, "loss": 0.4642, "step": 10902, "task_loss": 0.5162282586097717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4924187958240509, "epoch": 9.22, "learning_rate": 4.353338968723585e-06, "loss": 0.6292, "step": 10903, "task_loss": 0.3756810128688812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4626406133174896, "epoch": 9.22, "learning_rate": 4.348642810181272e-06, "loss": 0.5774, "step": 10904, "task_loss": 0.5998739004135132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4531788229942322, "epoch": 9.22, "learning_rate": 4.343946651638959e-06, "loss": 0.5687, "step": 10905, "task_loss": 0.6416094303131104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6396664977073669, "epoch": 9.22, "learning_rate": 4.339250493096647e-06, "loss": 0.5488, "step": 10906, "task_loss": 0.9034744501113892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5526031851768494, "epoch": 9.22, "learning_rate": 4.3345543345543345e-06, "loss": 0.6191, "step": 10907, "task_loss": 0.7673710584640503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5250399112701416, "epoch": 9.22, "learning_rate": 4.3298581760120225e-06, "loss": 0.4553, "step": 10908, "task_loss": 0.49567198753356934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6319273114204407, "epoch": 9.22, "learning_rate": 4.32516201746971e-06, "loss": 0.5483, "step": 10909, "task_loss": 0.9601724147796631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5628266334533691, "epoch": 9.22, "learning_rate": 4.320465858927398e-06, "loss": 0.5726, "step": 10910, "task_loss": 0.9729996919631958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5117441415786743, "epoch": 9.22, "learning_rate": 4.315769700385086e-06, "loss": 0.5154, "step": 10911, "task_loss": 1.0214203596115112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39926576614379883, "epoch": 9.22, "learning_rate": 4.311073541842773e-06, "loss": 0.5379, "step": 10912, "task_loss": 0.44812148809432983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5475550293922424, "epoch": 9.22, "learning_rate": 4.30637738330046e-06, "loss": 0.6162, "step": 10913, "task_loss": 0.7654815316200256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46837982535362244, "epoch": 9.23, "learning_rate": 4.301681224758148e-06, "loss": 0.4933, "step": 10914, "task_loss": 1.0316585302352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2505451440811157, "epoch": 9.23, "learning_rate": 4.296985066215835e-06, "loss": 0.3806, "step": 10915, "task_loss": 0.6235789656639099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7343634366989136, "epoch": 9.23, "learning_rate": 4.292288907673523e-06, "loss": 0.5451, "step": 10916, "task_loss": 1.1542083024978638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6989647150039673, "epoch": 9.23, "learning_rate": 4.287592749131211e-06, "loss": 0.6421, "step": 10917, "task_loss": 1.1470345258712769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41732776165008545, "epoch": 9.23, "learning_rate": 4.282896590588899e-06, "loss": 0.6963, "step": 10918, "task_loss": 0.34159597754478455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4899551272392273, "epoch": 9.23, "learning_rate": 4.278200432046587e-06, "loss": 0.5992, "step": 10919, "task_loss": 0.8436910510063171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5855897665023804, "epoch": 9.23, "learning_rate": 4.273504273504274e-06, "loss": 0.463, "step": 10920, "task_loss": 0.6768660545349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6877353191375732, "epoch": 9.23, "learning_rate": 4.268808114961961e-06, "loss": 0.5718, "step": 10921, "task_loss": 0.9600094556808472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3731490969657898, "epoch": 9.23, "learning_rate": 4.264111956419649e-06, "loss": 0.5394, "step": 10922, "task_loss": 0.47793394327163696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9520018100738525, "epoch": 9.23, "learning_rate": 4.259415797877336e-06, "loss": 0.745, "step": 10923, "task_loss": 1.4026192426681519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26013728976249695, "epoch": 9.23, "learning_rate": 4.254719639335024e-06, "loss": 0.4093, "step": 10924, "task_loss": 0.09092708677053452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6342916488647461, "epoch": 9.23, "learning_rate": 4.250023480792712e-06, "loss": 0.4316, "step": 10925, "task_loss": 0.8332868218421936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.522832989692688, "epoch": 9.24, "learning_rate": 4.2453273222503995e-06, "loss": 0.4659, "step": 10926, "task_loss": 0.5855490565299988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43368786573410034, "epoch": 9.24, "learning_rate": 4.2406311637080875e-06, "loss": 0.3576, "step": 10927, "task_loss": 0.07296571880578995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4034872055053711, "epoch": 9.24, "learning_rate": 4.235935005165775e-06, "loss": 0.4917, "step": 10928, "task_loss": 0.5275158286094666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30411869287490845, "epoch": 9.24, "learning_rate": 4.231238846623462e-06, "loss": 0.3645, "step": 10929, "task_loss": 0.38052770495414734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3986460864543915, "epoch": 9.24, "learning_rate": 4.22654268808115e-06, "loss": 0.5812, "step": 10930, "task_loss": 0.5870955586433411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5513052940368652, "epoch": 9.24, "learning_rate": 4.221846529538837e-06, "loss": 0.5743, "step": 10931, "task_loss": 1.5133676528930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7492802739143372, "epoch": 9.24, "learning_rate": 4.217150370996525e-06, "loss": 0.5667, "step": 10932, "task_loss": 1.339775800704956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.568235456943512, "epoch": 9.24, "learning_rate": 4.212454212454213e-06, "loss": 0.4528, "step": 10933, "task_loss": 0.6803421378135681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3524261713027954, "epoch": 9.24, "learning_rate": 4.2077580539119e-06, "loss": 0.3354, "step": 10934, "task_loss": 0.4697146713733673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4600464999675751, "epoch": 9.24, "learning_rate": 4.203061895369588e-06, "loss": 0.4703, "step": 10935, "task_loss": 0.592668890953064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9647623896598816, "epoch": 9.24, "learning_rate": 4.198365736827276e-06, "loss": 0.623, "step": 10936, "task_loss": 0.6262195110321045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8514769673347473, "epoch": 9.24, "learning_rate": 4.193669578284963e-06, "loss": 0.6031, "step": 10937, "task_loss": 0.7583869099617004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.622306764125824, "epoch": 9.25, "learning_rate": 4.188973419742651e-06, "loss": 0.6106, "step": 10938, "task_loss": 0.41055312752723694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7921923398971558, "epoch": 9.25, "learning_rate": 4.184277261200338e-06, "loss": 0.7328, "step": 10939, "task_loss": 1.082035779953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7467060089111328, "epoch": 9.25, "learning_rate": 4.179581102658025e-06, "loss": 0.5168, "step": 10940, "task_loss": 0.24246633052825928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5491392016410828, "epoch": 9.25, "learning_rate": 4.174884944115714e-06, "loss": 0.558, "step": 10941, "task_loss": 1.1125648021697998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4908609688282013, "epoch": 9.25, "learning_rate": 4.170188785573401e-06, "loss": 0.4992, "step": 10942, "task_loss": 0.6146554350852966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4285265803337097, "epoch": 9.25, "learning_rate": 4.1654926270310885e-06, "loss": 0.5121, "step": 10943, "task_loss": 0.8556405305862427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49097099900245667, "epoch": 9.25, "learning_rate": 4.1607964684887765e-06, "loss": 0.4452, "step": 10944, "task_loss": 0.48642709851264954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7617306709289551, "epoch": 9.25, "learning_rate": 4.156100309946464e-06, "loss": 0.543, "step": 10945, "task_loss": 1.170119285583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8503702878952026, "epoch": 9.25, "learning_rate": 4.151404151404152e-06, "loss": 0.6929, "step": 10946, "task_loss": 1.1385785341262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5523126125335693, "epoch": 9.25, "learning_rate": 4.146707992861839e-06, "loss": 0.5942, "step": 10947, "task_loss": 0.7568174004554749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8795843720436096, "epoch": 9.25, "learning_rate": 4.142011834319527e-06, "loss": 0.6733, "step": 10948, "task_loss": 0.966051459312439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7091909646987915, "epoch": 9.26, "learning_rate": 4.137315675777215e-06, "loss": 0.6119, "step": 10949, "task_loss": 1.6939926147460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2957199215888977, "epoch": 9.26, "learning_rate": 4.132619517234902e-06, "loss": 0.5533, "step": 10950, "task_loss": 0.9509096741676331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37209826707839966, "epoch": 9.26, "learning_rate": 4.127923358692589e-06, "loss": 0.6491, "step": 10951, "task_loss": 0.7759472131729126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3615013659000397, "epoch": 9.26, "learning_rate": 4.123227200150277e-06, "loss": 0.4066, "step": 10952, "task_loss": 0.8642329573631287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6423311829566956, "epoch": 9.26, "learning_rate": 4.1185310416079646e-06, "loss": 0.7173, "step": 10953, "task_loss": 0.858659565448761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6531955003738403, "epoch": 9.26, "learning_rate": 4.113834883065653e-06, "loss": 0.5675, "step": 10954, "task_loss": 0.7577497363090515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3250599503517151, "epoch": 9.26, "learning_rate": 4.10913872452334e-06, "loss": 0.4185, "step": 10955, "task_loss": 0.8010559678077698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45157450437545776, "epoch": 9.26, "learning_rate": 4.104442565981028e-06, "loss": 0.4836, "step": 10956, "task_loss": 1.5399510860443115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5517041683197021, "epoch": 9.26, "learning_rate": 4.099746407438716e-06, "loss": 0.6736, "step": 10957, "task_loss": 0.7331752777099609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38505929708480835, "epoch": 9.26, "learning_rate": 4.095050248896403e-06, "loss": 0.586, "step": 10958, "task_loss": 0.2563443183898926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8032345175743103, "epoch": 9.26, "learning_rate": 4.09035409035409e-06, "loss": 0.6244, "step": 10959, "task_loss": 0.26627886295318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26758769154548645, "epoch": 9.26, "learning_rate": 4.085657931811778e-06, "loss": 0.4273, "step": 10960, "task_loss": 0.10821530967950821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3220205307006836, "epoch": 9.27, "learning_rate": 4.0809617732694654e-06, "loss": 0.4937, "step": 10961, "task_loss": 1.168175220489502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41894710063934326, "epoch": 9.27, "learning_rate": 4.0762656147271535e-06, "loss": 0.4654, "step": 10962, "task_loss": 0.25754567980766296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5783056020736694, "epoch": 9.27, "learning_rate": 4.071569456184841e-06, "loss": 0.4733, "step": 10963, "task_loss": 0.5873746871948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5809851288795471, "epoch": 9.27, "learning_rate": 4.066873297642529e-06, "loss": 0.4767, "step": 10964, "task_loss": 0.693004310131073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33629119396209717, "epoch": 9.27, "learning_rate": 4.062177139100217e-06, "loss": 0.4741, "step": 10965, "task_loss": 0.526307225227356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5074222683906555, "epoch": 9.27, "learning_rate": 4.057480980557904e-06, "loss": 0.5677, "step": 10966, "task_loss": 1.1005892753601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7590937614440918, "epoch": 9.27, "learning_rate": 4.052784822015591e-06, "loss": 0.5201, "step": 10967, "task_loss": 0.48172539472579956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6252413392066956, "epoch": 9.27, "learning_rate": 4.048088663473279e-06, "loss": 0.6395, "step": 10968, "task_loss": 1.1368328332901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5254808068275452, "epoch": 9.27, "learning_rate": 4.043392504930966e-06, "loss": 0.4691, "step": 10969, "task_loss": 0.893790066242218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4146735668182373, "epoch": 9.27, "learning_rate": 4.038696346388654e-06, "loss": 0.456, "step": 10970, "task_loss": 0.5177074670791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41818273067474365, "epoch": 9.27, "learning_rate": 4.034000187846342e-06, "loss": 0.4448, "step": 10971, "task_loss": 1.249911904335022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3412929177284241, "epoch": 9.27, "learning_rate": 4.0293040293040296e-06, "loss": 0.4102, "step": 10972, "task_loss": 0.1637885868549347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3158021867275238, "epoch": 9.28, "learning_rate": 4.024607870761718e-06, "loss": 0.4618, "step": 10973, "task_loss": 0.2861343026161194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4541577100753784, "epoch": 9.28, "learning_rate": 4.019911712219405e-06, "loss": 0.5382, "step": 10974, "task_loss": 0.2660941183567047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6278258562088013, "epoch": 9.28, "learning_rate": 4.015215553677092e-06, "loss": 0.5823, "step": 10975, "task_loss": 1.1680188179016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5896298289299011, "epoch": 9.28, "learning_rate": 4.01051939513478e-06, "loss": 0.5628, "step": 10976, "task_loss": 0.45311036705970764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3496105372905731, "epoch": 9.28, "learning_rate": 4.005823236592467e-06, "loss": 0.4949, "step": 10977, "task_loss": 0.44767728447914124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4825911819934845, "epoch": 9.28, "learning_rate": 4.001127078050155e-06, "loss": 0.6344, "step": 10978, "task_loss": 0.7057150602340698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.753537654876709, "epoch": 9.28, "learning_rate": 3.996430919507843e-06, "loss": 0.5862, "step": 10979, "task_loss": 0.869903028011322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5526801943778992, "epoch": 9.28, "learning_rate": 3.9917347609655305e-06, "loss": 0.4746, "step": 10980, "task_loss": 0.7394396066665649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9483023881912231, "epoch": 9.28, "learning_rate": 3.9870386024232185e-06, "loss": 0.5849, "step": 10981, "task_loss": 2.007902145385742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4774699807167053, "epoch": 9.28, "learning_rate": 3.982342443880906e-06, "loss": 0.5065, "step": 10982, "task_loss": 0.17856507003307343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8343600630760193, "epoch": 9.28, "learning_rate": 3.977646285338593e-06, "loss": 0.6481, "step": 10983, "task_loss": 1.2437623739242554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42253315448760986, "epoch": 9.28, "learning_rate": 3.972950126796281e-06, "loss": 0.4204, "step": 10984, "task_loss": 0.8215905427932739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3095882833003998, "epoch": 9.29, "learning_rate": 3.968253968253968e-06, "loss": 0.5732, "step": 10985, "task_loss": 0.13707387447357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6788414716720581, "epoch": 9.29, "learning_rate": 3.963557809711656e-06, "loss": 0.5556, "step": 10986, "task_loss": 0.6071580648422241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.629853367805481, "epoch": 9.29, "learning_rate": 3.958861651169344e-06, "loss": 0.6, "step": 10987, "task_loss": 0.7008403539657593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3458211421966553, "epoch": 9.29, "learning_rate": 3.954165492627031e-06, "loss": 0.4421, "step": 10988, "task_loss": 0.810414731502533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4081161618232727, "epoch": 9.29, "learning_rate": 3.949469334084719e-06, "loss": 0.5676, "step": 10989, "task_loss": 0.07090999186038971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5614745020866394, "epoch": 9.29, "learning_rate": 3.9447731755424066e-06, "loss": 0.6604, "step": 10990, "task_loss": 0.7482591867446899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5996106863021851, "epoch": 9.29, "learning_rate": 3.940077017000094e-06, "loss": 0.5584, "step": 10991, "task_loss": 1.13728928565979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3209647834300995, "epoch": 9.29, "learning_rate": 3.935380858457782e-06, "loss": 0.552, "step": 10992, "task_loss": 0.30690452456474304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8693583011627197, "epoch": 9.29, "learning_rate": 3.930684699915469e-06, "loss": 0.5777, "step": 10993, "task_loss": 1.0151220560073853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.327233225107193, "epoch": 9.29, "learning_rate": 3.925988541373157e-06, "loss": 0.3927, "step": 10994, "task_loss": 0.67607182264328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9752513766288757, "epoch": 9.29, "learning_rate": 3.921292382830845e-06, "loss": 0.508, "step": 10995, "task_loss": 0.8663730621337891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3999868929386139, "epoch": 9.29, "learning_rate": 3.916596224288532e-06, "loss": 0.4652, "step": 10996, "task_loss": 0.5222601294517517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5390021800994873, "epoch": 9.3, "learning_rate": 3.91190006574622e-06, "loss": 0.6144, "step": 10997, "task_loss": 0.6221882104873657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7796196937561035, "epoch": 9.3, "learning_rate": 3.9072039072039074e-06, "loss": 0.5226, "step": 10998, "task_loss": 1.2386220693588257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5757112503051758, "epoch": 9.3, "learning_rate": 3.902507748661595e-06, "loss": 0.4222, "step": 10999, "task_loss": 0.43675899505615234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5463241338729858, "epoch": 9.3, "learning_rate": 3.897811590119283e-06, "loss": 0.5485, "step": 11000, "task_loss": 0.4274149239063263 }, { "epoch": 9.3, "eval_accuracy": 0.9038811881188119, "eval_loss": 0.35764050483703613, "eval_runtime": 225.2382, "eval_samples_per_second": 112.104, "eval_steps_per_second": 0.879, "step": 11000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7140568494796753, "epoch": 9.3, "learning_rate": 3.89311543157697e-06, "loss": 0.7323, "step": 11001, "task_loss": 0.5698778629302979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43075352907180786, "epoch": 9.3, "learning_rate": 3.888419273034658e-06, "loss": 0.6062, "step": 11002, "task_loss": 1.105376124382019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5407055616378784, "epoch": 9.3, "learning_rate": 3.883723114492346e-06, "loss": 0.5877, "step": 11003, "task_loss": 1.01487398147583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7124694585800171, "epoch": 9.3, "learning_rate": 3.879026955950033e-06, "loss": 0.4999, "step": 11004, "task_loss": 1.0324440002441406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6259205937385559, "epoch": 9.3, "learning_rate": 3.874330797407721e-06, "loss": 0.587, "step": 11005, "task_loss": 0.7234367728233337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6852434873580933, "epoch": 9.3, "learning_rate": 3.869634638865408e-06, "loss": 0.6335, "step": 11006, "task_loss": 0.2126229703426361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1977427005767822, "epoch": 9.3, "learning_rate": 3.8649384803230955e-06, "loss": 0.8879, "step": 11007, "task_loss": 1.018966794013977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4569547772407532, "epoch": 9.3, "learning_rate": 3.8602423217807835e-06, "loss": 0.3863, "step": 11008, "task_loss": 0.8507957458496094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2581004798412323, "epoch": 9.31, "learning_rate": 3.855546163238471e-06, "loss": 0.4681, "step": 11009, "task_loss": 0.3404015600681305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4858361482620239, "epoch": 9.31, "learning_rate": 3.850850004696159e-06, "loss": 0.4896, "step": 11010, "task_loss": 0.5981464982032776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35521265864372253, "epoch": 9.31, "learning_rate": 3.846153846153847e-06, "loss": 0.5955, "step": 11011, "task_loss": 0.2629264295101166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8452341556549072, "epoch": 9.31, "learning_rate": 3.841457687611534e-06, "loss": 0.7454, "step": 11012, "task_loss": 0.5197286009788513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3735959231853485, "epoch": 9.31, "learning_rate": 3.836761529069222e-06, "loss": 0.4526, "step": 11013, "task_loss": 0.12611602246761322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6044589281082153, "epoch": 9.31, "learning_rate": 3.832065370526909e-06, "loss": 0.5917, "step": 11014, "task_loss": 0.38324275612831116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5282130241394043, "epoch": 9.31, "learning_rate": 3.827369211984596e-06, "loss": 0.4926, "step": 11015, "task_loss": 0.7896764278411865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.572511613368988, "epoch": 9.31, "learning_rate": 3.822673053442284e-06, "loss": 0.5863, "step": 11016, "task_loss": 0.8338715434074402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5187878608703613, "epoch": 9.31, "learning_rate": 3.817976894899972e-06, "loss": 0.5099, "step": 11017, "task_loss": 0.4736097455024719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48359987139701843, "epoch": 9.31, "learning_rate": 3.81328073635766e-06, "loss": 0.5478, "step": 11018, "task_loss": 1.1570225954055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3049069344997406, "epoch": 9.31, "learning_rate": 3.8085845778153473e-06, "loss": 0.5006, "step": 11019, "task_loss": 0.4704192280769348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5359009504318237, "epoch": 9.32, "learning_rate": 3.803888419273035e-06, "loss": 0.6086, "step": 11020, "task_loss": 0.6723226308822632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6238926649093628, "epoch": 9.32, "learning_rate": 3.7991922607307225e-06, "loss": 0.5637, "step": 11021, "task_loss": 0.486113041639328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4815084934234619, "epoch": 9.32, "learning_rate": 3.79449610218841e-06, "loss": 0.5461, "step": 11022, "task_loss": 0.7111560106277466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9258511662483215, "epoch": 9.32, "learning_rate": 3.7897999436460977e-06, "loss": 0.5372, "step": 11023, "task_loss": 0.5527610182762146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9421027898788452, "epoch": 9.32, "learning_rate": 3.785103785103785e-06, "loss": 0.6362, "step": 11024, "task_loss": 0.9712353944778442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9945833683013916, "epoch": 9.32, "learning_rate": 3.7804076265614733e-06, "loss": 0.5268, "step": 11025, "task_loss": 1.420912504196167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3107377886772156, "epoch": 9.32, "learning_rate": 3.775711468019161e-06, "loss": 0.5698, "step": 11026, "task_loss": 0.7533959746360779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6495245695114136, "epoch": 9.32, "learning_rate": 3.771015309476848e-06, "loss": 0.4107, "step": 11027, "task_loss": 1.245865821838379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9657779932022095, "epoch": 9.32, "learning_rate": 3.7663191509345357e-06, "loss": 0.635, "step": 11028, "task_loss": 1.0642495155334473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4579669237136841, "epoch": 9.32, "learning_rate": 3.7616229923922234e-06, "loss": 0.4954, "step": 11029, "task_loss": 1.1511746644973755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6782408952713013, "epoch": 9.32, "learning_rate": 3.756926833849911e-06, "loss": 0.5067, "step": 11030, "task_loss": 0.6037449836730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.902891218662262, "epoch": 9.32, "learning_rate": 3.7522306753075986e-06, "loss": 0.5962, "step": 11031, "task_loss": 1.0377341508865356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6460734605789185, "epoch": 9.33, "learning_rate": 3.7475345167652858e-06, "loss": 0.5144, "step": 11032, "task_loss": 0.9097421765327454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46765586733818054, "epoch": 9.33, "learning_rate": 3.7428383582229742e-06, "loss": 0.5772, "step": 11033, "task_loss": 0.4408780038356781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6825538873672485, "epoch": 9.33, "learning_rate": 3.738142199680662e-06, "loss": 0.6653, "step": 11034, "task_loss": 0.528090238571167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.572077214717865, "epoch": 9.33, "learning_rate": 3.733446041138349e-06, "loss": 0.5151, "step": 11035, "task_loss": 1.003177523612976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.78909832239151, "epoch": 9.33, "learning_rate": 3.7287498825960366e-06, "loss": 0.6346, "step": 11036, "task_loss": 0.9628617763519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27340149879455566, "epoch": 9.33, "learning_rate": 3.7240537240537242e-06, "loss": 0.4802, "step": 11037, "task_loss": 0.42176955938339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6290633082389832, "epoch": 9.33, "learning_rate": 3.719357565511412e-06, "loss": 0.5366, "step": 11038, "task_loss": 1.171726942062378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6020877361297607, "epoch": 9.33, "learning_rate": 3.714661406969099e-06, "loss": 0.569, "step": 11039, "task_loss": 0.8706861138343811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.603912353515625, "epoch": 9.33, "learning_rate": 3.7099652484267866e-06, "loss": 0.6198, "step": 11040, "task_loss": 0.7017185688018799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30134326219558716, "epoch": 9.33, "learning_rate": 3.705269089884475e-06, "loss": 0.5152, "step": 11041, "task_loss": 0.7170421481132507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5750234723091125, "epoch": 9.33, "learning_rate": 3.7005729313421623e-06, "loss": 0.5655, "step": 11042, "task_loss": 0.5489616990089417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.70073002576828, "epoch": 9.33, "learning_rate": 3.69587677279985e-06, "loss": 0.6977, "step": 11043, "task_loss": 1.563845157623291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4483509659767151, "epoch": 9.34, "learning_rate": 3.6911806142575375e-06, "loss": 0.6021, "step": 11044, "task_loss": 0.8907885551452637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5995936989784241, "epoch": 9.34, "learning_rate": 3.686484455715225e-06, "loss": 0.5584, "step": 11045, "task_loss": 1.0019899606704712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6675196290016174, "epoch": 9.34, "learning_rate": 3.6817882971729127e-06, "loss": 0.5926, "step": 11046, "task_loss": 0.529183566570282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6787434816360474, "epoch": 9.34, "learning_rate": 3.6770921386306e-06, "loss": 0.6068, "step": 11047, "task_loss": 0.3641342520713806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3751561641693115, "epoch": 9.34, "learning_rate": 3.6723959800882875e-06, "loss": 0.5904, "step": 11048, "task_loss": 0.34313997626304626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.500418484210968, "epoch": 9.34, "learning_rate": 3.667699821545976e-06, "loss": 0.5748, "step": 11049, "task_loss": 0.19498911499977112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34913453459739685, "epoch": 9.34, "learning_rate": 3.663003663003663e-06, "loss": 0.4181, "step": 11050, "task_loss": 0.5265745520591736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7342382073402405, "epoch": 9.34, "learning_rate": 3.6583075044613508e-06, "loss": 0.5721, "step": 11051, "task_loss": 0.22351816296577454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39087793231010437, "epoch": 9.34, "learning_rate": 3.6536113459190384e-06, "loss": 0.5917, "step": 11052, "task_loss": 0.6375265121459961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.644103467464447, "epoch": 9.34, "learning_rate": 3.648915187376726e-06, "loss": 0.4524, "step": 11053, "task_loss": 0.6648716330528259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5738146901130676, "epoch": 9.34, "learning_rate": 3.6442190288344136e-06, "loss": 0.5266, "step": 11054, "task_loss": 0.42964690923690796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6777070164680481, "epoch": 9.34, "learning_rate": 3.639522870292101e-06, "loss": 0.5765, "step": 11055, "task_loss": 0.7385520935058594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5895520448684692, "epoch": 9.35, "learning_rate": 3.6348267117497893e-06, "loss": 0.6798, "step": 11056, "task_loss": 1.3323516845703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5305094718933105, "epoch": 9.35, "learning_rate": 3.630130553207477e-06, "loss": 0.5703, "step": 11057, "task_loss": 0.7368490099906921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36000561714172363, "epoch": 9.35, "learning_rate": 3.625434394665164e-06, "loss": 0.4451, "step": 11058, "task_loss": 0.3638128638267517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3327975869178772, "epoch": 9.35, "learning_rate": 3.6207382361228517e-06, "loss": 0.4397, "step": 11059, "task_loss": 1.5285719633102417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3917226791381836, "epoch": 9.35, "learning_rate": 3.6160420775805393e-06, "loss": 0.7669, "step": 11060, "task_loss": 0.44794198870658875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.641332745552063, "epoch": 9.35, "learning_rate": 3.611345919038227e-06, "loss": 0.5377, "step": 11061, "task_loss": 0.5768241882324219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39325499534606934, "epoch": 9.35, "learning_rate": 3.6066497604959145e-06, "loss": 0.4892, "step": 11062, "task_loss": 0.3297574818134308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6134918928146362, "epoch": 9.35, "learning_rate": 3.6019536019536017e-06, "loss": 0.5342, "step": 11063, "task_loss": 1.0496389865875244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5329225063323975, "epoch": 9.35, "learning_rate": 3.59725744341129e-06, "loss": 0.4608, "step": 11064, "task_loss": 0.7007267475128174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3505586087703705, "epoch": 9.35, "learning_rate": 3.5925612848689777e-06, "loss": 0.5724, "step": 11065, "task_loss": 0.1864093393087387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7310865521430969, "epoch": 9.35, "learning_rate": 3.587865126326665e-06, "loss": 0.598, "step": 11066, "task_loss": 0.6960495710372925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7150852084159851, "epoch": 9.35, "learning_rate": 3.5831689677843525e-06, "loss": 0.4768, "step": 11067, "task_loss": 0.952617347240448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.762128472328186, "epoch": 9.36, "learning_rate": 3.57847280924204e-06, "loss": 0.6121, "step": 11068, "task_loss": 1.0737755298614502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41830921173095703, "epoch": 9.36, "learning_rate": 3.5737766506997278e-06, "loss": 0.4232, "step": 11069, "task_loss": 0.4652903974056244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3023297190666199, "epoch": 9.36, "learning_rate": 3.569080492157415e-06, "loss": 0.3323, "step": 11070, "task_loss": 0.4165792167186737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.650453507900238, "epoch": 9.36, "learning_rate": 3.5643843336151026e-06, "loss": 0.5822, "step": 11071, "task_loss": 0.8610770106315613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6708232164382935, "epoch": 9.36, "learning_rate": 3.559688175072791e-06, "loss": 0.5388, "step": 11072, "task_loss": 0.8451786637306213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8068320155143738, "epoch": 9.36, "learning_rate": 3.5549920165304786e-06, "loss": 0.6151, "step": 11073, "task_loss": 0.9385790824890137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4990968108177185, "epoch": 9.36, "learning_rate": 3.550295857988166e-06, "loss": 0.6511, "step": 11074, "task_loss": 0.3814873695373535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47068220376968384, "epoch": 9.36, "learning_rate": 3.5455996994458534e-06, "loss": 0.4634, "step": 11075, "task_loss": 0.33107253909111023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6775467991828918, "epoch": 9.36, "learning_rate": 3.540903540903541e-06, "loss": 0.5738, "step": 11076, "task_loss": 0.8753811717033386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.422080934047699, "epoch": 9.36, "learning_rate": 3.5362073823612286e-06, "loss": 0.437, "step": 11077, "task_loss": 0.2625013291835785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0858197212219238, "epoch": 9.36, "learning_rate": 3.531511223818916e-06, "loss": 0.5868, "step": 11078, "task_loss": 0.7094920873641968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25350257754325867, "epoch": 9.36, "learning_rate": 3.5268150652766043e-06, "loss": 0.4084, "step": 11079, "task_loss": 0.1762065589427948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7187150716781616, "epoch": 9.37, "learning_rate": 3.522118906734292e-06, "loss": 0.6457, "step": 11080, "task_loss": 1.1745599508285522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3787701725959778, "epoch": 9.37, "learning_rate": 3.517422748191979e-06, "loss": 0.4752, "step": 11081, "task_loss": 0.367812842130661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6437379717826843, "epoch": 9.37, "learning_rate": 3.5127265896496667e-06, "loss": 0.7166, "step": 11082, "task_loss": 1.4012805223464966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48033130168914795, "epoch": 9.37, "learning_rate": 3.5080304311073543e-06, "loss": 0.5487, "step": 11083, "task_loss": 0.5171042084693909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7722226977348328, "epoch": 9.37, "learning_rate": 3.503334272565042e-06, "loss": 0.7103, "step": 11084, "task_loss": 1.2335305213928223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4388309717178345, "epoch": 9.37, "learning_rate": 3.4986381140227295e-06, "loss": 0.4219, "step": 11085, "task_loss": 0.924210786819458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33908790349960327, "epoch": 9.37, "learning_rate": 3.4939419554804167e-06, "loss": 0.6567, "step": 11086, "task_loss": 0.4795485734939575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29407113790512085, "epoch": 9.37, "learning_rate": 3.489245796938105e-06, "loss": 0.514, "step": 11087, "task_loss": 0.7161890864372253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5133517980575562, "epoch": 9.37, "learning_rate": 3.4845496383957928e-06, "loss": 0.5356, "step": 11088, "task_loss": 1.0491986274719238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3746059834957123, "epoch": 9.37, "learning_rate": 3.47985347985348e-06, "loss": 0.6627, "step": 11089, "task_loss": 0.8216314315795898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45719343423843384, "epoch": 9.37, "learning_rate": 3.4751573213111676e-06, "loss": 0.5076, "step": 11090, "task_loss": 0.6946412920951843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4834900498390198, "epoch": 9.38, "learning_rate": 3.470461162768855e-06, "loss": 0.601, "step": 11091, "task_loss": 1.1683692932128906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41766154766082764, "epoch": 9.38, "learning_rate": 3.465765004226543e-06, "loss": 0.5492, "step": 11092, "task_loss": 0.4272165894508362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5390196442604065, "epoch": 9.38, "learning_rate": 3.4610688456842304e-06, "loss": 0.461, "step": 11093, "task_loss": 0.586580753326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5226321220397949, "epoch": 9.38, "learning_rate": 3.4563726871419176e-06, "loss": 0.4098, "step": 11094, "task_loss": 0.29370424151420593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3643988370895386, "epoch": 9.38, "learning_rate": 3.451676528599606e-06, "loss": 0.5589, "step": 11095, "task_loss": 1.444654941558838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3300498425960541, "epoch": 9.38, "learning_rate": 3.4469803700572937e-06, "loss": 0.4094, "step": 11096, "task_loss": 0.37557774782180786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7911975979804993, "epoch": 9.38, "learning_rate": 3.442284211514981e-06, "loss": 0.4712, "step": 11097, "task_loss": 0.48998376727104187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6135490536689758, "epoch": 9.38, "learning_rate": 3.4375880529726685e-06, "loss": 0.602, "step": 11098, "task_loss": 0.38733822107315063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32408007979393005, "epoch": 9.38, "learning_rate": 3.432891894430356e-06, "loss": 0.4412, "step": 11099, "task_loss": 0.664608359336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29378288984298706, "epoch": 9.38, "learning_rate": 3.4281957358880437e-06, "loss": 0.5081, "step": 11100, "task_loss": 0.09347495436668396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4468725323677063, "epoch": 9.38, "learning_rate": 3.4234995773457313e-06, "loss": 0.4573, "step": 11101, "task_loss": 1.1402078866958618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8350475430488586, "epoch": 9.38, "learning_rate": 3.4188034188034193e-06, "loss": 0.5234, "step": 11102, "task_loss": 0.6529058814048767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6792205572128296, "epoch": 9.39, "learning_rate": 3.414107260261107e-06, "loss": 0.6285, "step": 11103, "task_loss": 0.9224597215652466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8460720777511597, "epoch": 9.39, "learning_rate": 3.4094111017187945e-06, "loss": 0.775, "step": 11104, "task_loss": 1.1045620441436768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3041406273841858, "epoch": 9.39, "learning_rate": 3.4047149431764817e-06, "loss": 0.4995, "step": 11105, "task_loss": 0.6274885535240173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5086324214935303, "epoch": 9.39, "learning_rate": 3.4000187846341693e-06, "loss": 0.5194, "step": 11106, "task_loss": 1.2403982877731323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6446537971496582, "epoch": 9.39, "learning_rate": 3.395322626091857e-06, "loss": 0.4995, "step": 11107, "task_loss": 1.1975113153457642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6286758184432983, "epoch": 9.39, "learning_rate": 3.3906264675495446e-06, "loss": 0.6297, "step": 11108, "task_loss": 0.6870454549789429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3783208131790161, "epoch": 9.39, "learning_rate": 3.3859303090072318e-06, "loss": 0.4738, "step": 11109, "task_loss": 0.7632800936698914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9395403861999512, "epoch": 9.39, "learning_rate": 3.38123415046492e-06, "loss": 0.7192, "step": 11110, "task_loss": 0.7955145835876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5888002514839172, "epoch": 9.39, "learning_rate": 3.376537991922608e-06, "loss": 0.4269, "step": 11111, "task_loss": 1.1337482929229736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.2262974977493286, "epoch": 9.39, "learning_rate": 3.3718418333802954e-06, "loss": 0.7263, "step": 11112, "task_loss": 1.088402271270752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27186477184295654, "epoch": 9.39, "learning_rate": 3.3671456748379826e-06, "loss": 0.4708, "step": 11113, "task_loss": 0.032991018146276474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7451769113540649, "epoch": 9.39, "learning_rate": 3.3624495162956702e-06, "loss": 0.5645, "step": 11114, "task_loss": 1.7511882781982422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3677648901939392, "epoch": 9.4, "learning_rate": 3.357753357753358e-06, "loss": 0.4433, "step": 11115, "task_loss": 0.2919082045555115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23110246658325195, "epoch": 9.4, "learning_rate": 3.3530571992110454e-06, "loss": 0.4979, "step": 11116, "task_loss": 0.49213844537734985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44743162393569946, "epoch": 9.4, "learning_rate": 3.3483610406687326e-06, "loss": 0.4855, "step": 11117, "task_loss": 0.579852819442749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6172415614128113, "epoch": 9.4, "learning_rate": 3.343664882126421e-06, "loss": 0.5107, "step": 11118, "task_loss": 0.899348795413971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5544959306716919, "epoch": 9.4, "learning_rate": 3.3389687235841087e-06, "loss": 0.5974, "step": 11119, "task_loss": 0.2919134199619293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2875138223171234, "epoch": 9.4, "learning_rate": 3.334272565041796e-06, "loss": 0.5386, "step": 11120, "task_loss": 0.5029345154762268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46017682552337646, "epoch": 9.4, "learning_rate": 3.3295764064994835e-06, "loss": 0.3596, "step": 11121, "task_loss": 0.40966182947158813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4596652388572693, "epoch": 9.4, "learning_rate": 3.324880247957171e-06, "loss": 0.5156, "step": 11122, "task_loss": 1.057257890701294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46278709173202515, "epoch": 9.4, "learning_rate": 3.3201840894148587e-06, "loss": 0.5099, "step": 11123, "task_loss": 0.43311333656311035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42865991592407227, "epoch": 9.4, "learning_rate": 3.3154879308725463e-06, "loss": 0.5467, "step": 11124, "task_loss": 0.2056272029876709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5136969685554504, "epoch": 9.4, "learning_rate": 3.3107917723302335e-06, "loss": 0.5897, "step": 11125, "task_loss": 0.6179768443107605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6414124965667725, "epoch": 9.4, "learning_rate": 3.306095613787922e-06, "loss": 0.7125, "step": 11126, "task_loss": 1.4045681953430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6912810206413269, "epoch": 9.41, "learning_rate": 3.3013994552456096e-06, "loss": 0.4868, "step": 11127, "task_loss": 0.502131462097168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6321882009506226, "epoch": 9.41, "learning_rate": 3.2967032967032968e-06, "loss": 0.6458, "step": 11128, "task_loss": 0.8637170791625977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3912673592567444, "epoch": 9.41, "learning_rate": 3.2920071381609844e-06, "loss": 0.5482, "step": 11129, "task_loss": 0.05519155412912369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8124796152114868, "epoch": 9.41, "learning_rate": 3.287310979618672e-06, "loss": 0.6199, "step": 11130, "task_loss": 0.4740569293498993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5340747833251953, "epoch": 9.41, "learning_rate": 3.2826148210763596e-06, "loss": 0.5758, "step": 11131, "task_loss": 1.6151121854782104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3482801914215088, "epoch": 9.41, "learning_rate": 3.277918662534047e-06, "loss": 0.5309, "step": 11132, "task_loss": 0.9393897652626038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33681902289390564, "epoch": 9.41, "learning_rate": 3.2732225039917352e-06, "loss": 0.3607, "step": 11133, "task_loss": 0.3562697172164917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6691262722015381, "epoch": 9.41, "learning_rate": 3.268526345449423e-06, "loss": 0.5117, "step": 11134, "task_loss": 0.4653528034687042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39411160349845886, "epoch": 9.41, "learning_rate": 3.2638301869071105e-06, "loss": 0.5078, "step": 11135, "task_loss": 0.8771699666976929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2250085324048996, "epoch": 9.41, "learning_rate": 3.2591340283647977e-06, "loss": 0.5092, "step": 11136, "task_loss": 0.8824301958084106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39933061599731445, "epoch": 9.41, "learning_rate": 3.2544378698224853e-06, "loss": 0.519, "step": 11137, "task_loss": 0.5607483386993408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5592647194862366, "epoch": 9.41, "learning_rate": 3.249741711280173e-06, "loss": 0.4436, "step": 11138, "task_loss": 0.7491999864578247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4985281825065613, "epoch": 9.42, "learning_rate": 3.2450455527378605e-06, "loss": 0.6113, "step": 11139, "task_loss": 0.47277867794036865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41220033168792725, "epoch": 9.42, "learning_rate": 3.240349394195548e-06, "loss": 0.5791, "step": 11140, "task_loss": 1.0582389831542969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33387482166290283, "epoch": 9.42, "learning_rate": 3.235653235653236e-06, "loss": 0.5485, "step": 11141, "task_loss": 0.7039389610290527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6312485337257385, "epoch": 9.42, "learning_rate": 3.2309570771109237e-06, "loss": 0.6898, "step": 11142, "task_loss": 0.5599284172058105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5976980924606323, "epoch": 9.42, "learning_rate": 3.2262609185686113e-06, "loss": 0.6298, "step": 11143, "task_loss": 0.30890753865242004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.295779287815094, "epoch": 9.42, "learning_rate": 3.2215647600262985e-06, "loss": 0.5604, "step": 11144, "task_loss": 0.5468045473098755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5590115785598755, "epoch": 9.42, "learning_rate": 3.216868601483986e-06, "loss": 0.5013, "step": 11145, "task_loss": 0.49378541111946106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5546669363975525, "epoch": 9.42, "learning_rate": 3.2121724429416738e-06, "loss": 0.6615, "step": 11146, "task_loss": 0.7564154267311096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6785353422164917, "epoch": 9.42, "learning_rate": 3.2074762843993614e-06, "loss": 0.554, "step": 11147, "task_loss": 0.40302467346191406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7306913733482361, "epoch": 9.42, "learning_rate": 3.2027801258570485e-06, "loss": 0.5921, "step": 11148, "task_loss": 0.6420811414718628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48053300380706787, "epoch": 9.42, "learning_rate": 3.198083967314737e-06, "loss": 0.5613, "step": 11149, "task_loss": 0.5979509353637695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.668728232383728, "epoch": 9.42, "learning_rate": 3.1933878087724246e-06, "loss": 0.6341, "step": 11150, "task_loss": 1.4296324253082275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3514482378959656, "epoch": 9.43, "learning_rate": 3.1886916502301122e-06, "loss": 0.5504, "step": 11151, "task_loss": 0.8765593767166138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3960622549057007, "epoch": 9.43, "learning_rate": 3.1839954916877994e-06, "loss": 0.5836, "step": 11152, "task_loss": 0.6594244837760925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9185369610786438, "epoch": 9.43, "learning_rate": 3.179299333145487e-06, "loss": 0.6816, "step": 11153, "task_loss": 0.5660412311553955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4780644178390503, "epoch": 9.43, "learning_rate": 3.1746031746031746e-06, "loss": 0.5219, "step": 11154, "task_loss": 0.5521951913833618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3349250853061676, "epoch": 9.43, "learning_rate": 3.1699070160608622e-06, "loss": 0.4432, "step": 11155, "task_loss": 0.407823383808136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4655912518501282, "epoch": 9.43, "learning_rate": 3.1652108575185503e-06, "loss": 0.514, "step": 11156, "task_loss": 0.4712706208229065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5052464604377747, "epoch": 9.43, "learning_rate": 3.160514698976238e-06, "loss": 0.5995, "step": 11157, "task_loss": 0.9926379323005676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.731526255607605, "epoch": 9.43, "learning_rate": 3.1558185404339255e-06, "loss": 0.5824, "step": 11158, "task_loss": 0.7762466669082642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37927356362342834, "epoch": 9.43, "learning_rate": 3.1511223818916127e-06, "loss": 0.6127, "step": 11159, "task_loss": 0.8265427350997925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3676943778991699, "epoch": 9.43, "learning_rate": 3.1464262233493003e-06, "loss": 0.4114, "step": 11160, "task_loss": 0.25177186727523804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35482627153396606, "epoch": 9.43, "learning_rate": 3.141730064806988e-06, "loss": 0.4682, "step": 11161, "task_loss": 0.4318864941596985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4356091022491455, "epoch": 9.44, "learning_rate": 3.1370339062646755e-06, "loss": 0.5349, "step": 11162, "task_loss": 0.6081743836402893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7963196039199829, "epoch": 9.44, "learning_rate": 3.132337747722363e-06, "loss": 0.5166, "step": 11163, "task_loss": 1.1281956434249878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4117113947868347, "epoch": 9.44, "learning_rate": 3.127641589180051e-06, "loss": 0.5554, "step": 11164, "task_loss": 0.8243807554244995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3230505585670471, "epoch": 9.44, "learning_rate": 3.1229454306377383e-06, "loss": 0.4225, "step": 11165, "task_loss": 0.9352537989616394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35018107295036316, "epoch": 9.44, "learning_rate": 3.1182492720954264e-06, "loss": 0.5008, "step": 11166, "task_loss": 1.1259442567825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5738670825958252, "epoch": 9.44, "learning_rate": 3.1135531135531136e-06, "loss": 0.4466, "step": 11167, "task_loss": 0.24371032416820526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5985697507858276, "epoch": 9.44, "learning_rate": 3.108856955010801e-06, "loss": 0.5247, "step": 11168, "task_loss": 0.48119136691093445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5480961799621582, "epoch": 9.44, "learning_rate": 3.1041607964684888e-06, "loss": 0.6098, "step": 11169, "task_loss": 1.2812097072601318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5549170970916748, "epoch": 9.44, "learning_rate": 3.099464637926177e-06, "loss": 0.6009, "step": 11170, "task_loss": 0.3722454905509949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5054013729095459, "epoch": 9.44, "learning_rate": 3.094768479383864e-06, "loss": 0.5076, "step": 11171, "task_loss": 1.0438300371170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3445597290992737, "epoch": 9.44, "learning_rate": 3.0900723208415516e-06, "loss": 0.5084, "step": 11172, "task_loss": 0.5532470345497131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8288873434066772, "epoch": 9.44, "learning_rate": 3.0853761622992392e-06, "loss": 0.5571, "step": 11173, "task_loss": 1.179551362991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3122905194759369, "epoch": 9.45, "learning_rate": 3.0806800037569273e-06, "loss": 0.6211, "step": 11174, "task_loss": 0.8374273777008057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4194033741950989, "epoch": 9.45, "learning_rate": 3.0759838452146144e-06, "loss": 0.4235, "step": 11175, "task_loss": 0.7910700440406799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5038051009178162, "epoch": 9.45, "learning_rate": 3.071287686672302e-06, "loss": 0.6776, "step": 11176, "task_loss": 0.6810777187347412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30673205852508545, "epoch": 9.45, "learning_rate": 3.0665915281299897e-06, "loss": 0.3794, "step": 11177, "task_loss": 0.2551667094230652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26858779788017273, "epoch": 9.45, "learning_rate": 3.0618953695876777e-06, "loss": 0.5452, "step": 11178, "task_loss": 0.5609457492828369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6627442836761475, "epoch": 9.45, "learning_rate": 3.057199211045365e-06, "loss": 0.6484, "step": 11179, "task_loss": 0.4815748929977417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5693459510803223, "epoch": 9.45, "learning_rate": 3.0525030525030525e-06, "loss": 0.693, "step": 11180, "task_loss": 0.14833194017410278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27992063760757446, "epoch": 9.45, "learning_rate": 3.0478068939607405e-06, "loss": 0.4317, "step": 11181, "task_loss": 0.9693751335144043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6363602876663208, "epoch": 9.45, "learning_rate": 3.043110735418428e-06, "loss": 0.7418, "step": 11182, "task_loss": 1.4462608098983765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4637390375137329, "epoch": 9.45, "learning_rate": 3.0384145768761153e-06, "loss": 0.5685, "step": 11183, "task_loss": 0.48982736468315125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7309938073158264, "epoch": 9.45, "learning_rate": 3.033718418333803e-06, "loss": 0.6088, "step": 11184, "task_loss": 0.9512088894844055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5189377069473267, "epoch": 9.45, "learning_rate": 3.029022259791491e-06, "loss": 0.5628, "step": 11185, "task_loss": 0.7732096910476685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.13867291808128357, "epoch": 9.46, "learning_rate": 3.0243261012491786e-06, "loss": 0.3426, "step": 11186, "task_loss": 0.043049365282058716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8073858022689819, "epoch": 9.46, "learning_rate": 3.0196299427068658e-06, "loss": 0.5108, "step": 11187, "task_loss": 0.7730945944786072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3671865165233612, "epoch": 9.46, "learning_rate": 3.0149337841645534e-06, "loss": 0.5131, "step": 11188, "task_loss": 0.6448646187782288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48400965332984924, "epoch": 9.46, "learning_rate": 3.0102376256222414e-06, "loss": 0.5077, "step": 11189, "task_loss": 1.7643741369247437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5058324337005615, "epoch": 9.46, "learning_rate": 3.0055414670799286e-06, "loss": 0.4617, "step": 11190, "task_loss": 1.250806212425232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38470160961151123, "epoch": 9.46, "learning_rate": 3.0008453085376162e-06, "loss": 0.4523, "step": 11191, "task_loss": 0.4217795729637146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5489324331283569, "epoch": 9.46, "learning_rate": 2.996149149995304e-06, "loss": 0.6275, "step": 11192, "task_loss": 0.5095259547233582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4338130056858063, "epoch": 9.46, "learning_rate": 2.991452991452992e-06, "loss": 0.5194, "step": 11193, "task_loss": 0.5813493132591248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39201757311820984, "epoch": 9.46, "learning_rate": 2.986756832910679e-06, "loss": 0.4791, "step": 11194, "task_loss": 1.7503974437713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4456171989440918, "epoch": 9.46, "learning_rate": 2.9820606743683667e-06, "loss": 0.4746, "step": 11195, "task_loss": 1.2226107120513916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39763957262039185, "epoch": 9.46, "learning_rate": 2.9773645158260543e-06, "loss": 0.4075, "step": 11196, "task_loss": 0.5088104009628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42229801416397095, "epoch": 9.46, "learning_rate": 2.9726683572837423e-06, "loss": 0.4595, "step": 11197, "task_loss": 0.8628992438316345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4571329951286316, "epoch": 9.47, "learning_rate": 2.9679721987414295e-06, "loss": 0.4993, "step": 11198, "task_loss": 0.5981459021568298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5947855710983276, "epoch": 9.47, "learning_rate": 2.963276040199117e-06, "loss": 0.5615, "step": 11199, "task_loss": 0.8464884757995605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4146347641944885, "epoch": 9.47, "learning_rate": 2.9585798816568047e-06, "loss": 0.553, "step": 11200, "task_loss": 0.535025954246521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.797527015209198, "epoch": 9.47, "learning_rate": 2.9538837231144927e-06, "loss": 0.5237, "step": 11201, "task_loss": 0.6189848184585571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30893757939338684, "epoch": 9.47, "learning_rate": 2.94918756457218e-06, "loss": 0.4715, "step": 11202, "task_loss": 0.028159160166978836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49830928444862366, "epoch": 9.47, "learning_rate": 2.9444914060298675e-06, "loss": 0.5356, "step": 11203, "task_loss": 1.4245926141738892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7145611047744751, "epoch": 9.47, "learning_rate": 2.939795247487555e-06, "loss": 0.5594, "step": 11204, "task_loss": 0.6180378198623657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.17425081133842468, "epoch": 9.47, "learning_rate": 2.935099088945243e-06, "loss": 0.4694, "step": 11205, "task_loss": 0.06243056431412697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3144095838069916, "epoch": 9.47, "learning_rate": 2.9304029304029304e-06, "loss": 0.5122, "step": 11206, "task_loss": 0.11671505123376846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4932126998901367, "epoch": 9.47, "learning_rate": 2.925706771860618e-06, "loss": 0.4326, "step": 11207, "task_loss": 0.2425106316804886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9185200333595276, "epoch": 9.47, "learning_rate": 2.921010613318306e-06, "loss": 0.6074, "step": 11208, "task_loss": 1.4237382411956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6256715059280396, "epoch": 9.47, "learning_rate": 2.9163144547759936e-06, "loss": 0.6569, "step": 11209, "task_loss": 1.1951192617416382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8609659075737, "epoch": 9.48, "learning_rate": 2.911618296233681e-06, "loss": 0.591, "step": 11210, "task_loss": 0.6378014087677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5100657939910889, "epoch": 9.48, "learning_rate": 2.9069221376913684e-06, "loss": 0.6524, "step": 11211, "task_loss": 0.2991871237754822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.563478410243988, "epoch": 9.48, "learning_rate": 2.9022259791490565e-06, "loss": 0.5434, "step": 11212, "task_loss": 0.3929310142993927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4076765477657318, "epoch": 9.48, "learning_rate": 2.897529820606744e-06, "loss": 0.4575, "step": 11213, "task_loss": 0.3963709771633148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29768604040145874, "epoch": 9.48, "learning_rate": 2.8928336620644312e-06, "loss": 0.3552, "step": 11214, "task_loss": 0.5723297595977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2555665373802185, "epoch": 9.48, "learning_rate": 2.888137503522119e-06, "loss": 0.4123, "step": 11215, "task_loss": 0.22902387380599976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5171318650245667, "epoch": 9.48, "learning_rate": 2.883441344979807e-06, "loss": 0.4955, "step": 11216, "task_loss": 0.9553980827331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4600731134414673, "epoch": 9.48, "learning_rate": 2.8787451864374945e-06, "loss": 0.6287, "step": 11217, "task_loss": 0.5804479122161865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.414058655500412, "epoch": 9.48, "learning_rate": 2.8740490278951817e-06, "loss": 0.5244, "step": 11218, "task_loss": 0.20023566484451294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47961902618408203, "epoch": 9.48, "learning_rate": 2.8693528693528693e-06, "loss": 0.4695, "step": 11219, "task_loss": 0.9276801347732544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5101318955421448, "epoch": 9.48, "learning_rate": 2.8646567108105573e-06, "loss": 0.4577, "step": 11220, "task_loss": 0.4558369517326355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.699172854423523, "epoch": 9.48, "learning_rate": 2.859960552268245e-06, "loss": 0.6084, "step": 11221, "task_loss": 1.4894967079162598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3634474277496338, "epoch": 9.49, "learning_rate": 2.855264393725932e-06, "loss": 0.4718, "step": 11222, "task_loss": 0.5079518556594849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5747938752174377, "epoch": 9.49, "learning_rate": 2.8505682351836197e-06, "loss": 0.5409, "step": 11223, "task_loss": 0.9425478577613831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7430768013000488, "epoch": 9.49, "learning_rate": 2.8458720766413078e-06, "loss": 0.6543, "step": 11224, "task_loss": 1.360588788986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5270208120346069, "epoch": 9.49, "learning_rate": 2.8411759180989954e-06, "loss": 0.6836, "step": 11225, "task_loss": 0.9736447930335999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6401659250259399, "epoch": 9.49, "learning_rate": 2.8364797595566826e-06, "loss": 0.4673, "step": 11226, "task_loss": 0.48319482803344727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.693993330001831, "epoch": 9.49, "learning_rate": 2.83178360101437e-06, "loss": 0.4836, "step": 11227, "task_loss": 1.4331079721450806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5109017491340637, "epoch": 9.49, "learning_rate": 2.8270874424720582e-06, "loss": 0.5796, "step": 11228, "task_loss": 0.126139834523201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7835152745246887, "epoch": 9.49, "learning_rate": 2.8223912839297454e-06, "loss": 0.5748, "step": 11229, "task_loss": 0.43810319900512695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4638989269733429, "epoch": 9.49, "learning_rate": 2.817695125387433e-06, "loss": 0.4891, "step": 11230, "task_loss": 0.5794159770011902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5640575289726257, "epoch": 9.49, "learning_rate": 2.8129989668451206e-06, "loss": 0.4949, "step": 11231, "task_loss": 0.422950804233551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6392498016357422, "epoch": 9.49, "learning_rate": 2.8083028083028087e-06, "loss": 0.4538, "step": 11232, "task_loss": 0.8638476729393005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8038769960403442, "epoch": 9.5, "learning_rate": 2.803606649760496e-06, "loss": 0.5792, "step": 11233, "task_loss": 1.003324270248413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3015412390232086, "epoch": 9.5, "learning_rate": 2.7989104912181835e-06, "loss": 0.4918, "step": 11234, "task_loss": 0.31251290440559387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4549716114997864, "epoch": 9.5, "learning_rate": 2.7942143326758715e-06, "loss": 0.3199, "step": 11235, "task_loss": 0.7041872143745422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6487836837768555, "epoch": 9.5, "learning_rate": 2.789518174133559e-06, "loss": 0.5095, "step": 11236, "task_loss": 0.5114790201187134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5082249045372009, "epoch": 9.5, "learning_rate": 2.7848220155912463e-06, "loss": 0.4761, "step": 11237, "task_loss": 0.34324270486831665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5402510762214661, "epoch": 9.5, "learning_rate": 2.780125857048934e-06, "loss": 0.4301, "step": 11238, "task_loss": 1.1007641553878784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5648353099822998, "epoch": 9.5, "learning_rate": 2.775429698506622e-06, "loss": 0.5534, "step": 11239, "task_loss": 1.6882665157318115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.724153995513916, "epoch": 9.5, "learning_rate": 2.7707335399643095e-06, "loss": 0.7064, "step": 11240, "task_loss": 0.3488437831401825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4707251787185669, "epoch": 9.5, "learning_rate": 2.7660373814219967e-06, "loss": 0.5252, "step": 11241, "task_loss": 0.8056434988975525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3624878227710724, "epoch": 9.5, "learning_rate": 2.7613412228796843e-06, "loss": 0.5243, "step": 11242, "task_loss": 1.3109062910079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46566590666770935, "epoch": 9.5, "learning_rate": 2.7566450643373724e-06, "loss": 0.6326, "step": 11243, "task_loss": 1.012470006942749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.391556978225708, "epoch": 9.5, "learning_rate": 2.75194890579506e-06, "loss": 0.5736, "step": 11244, "task_loss": 0.32321351766586304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49198341369628906, "epoch": 9.51, "learning_rate": 2.747252747252747e-06, "loss": 0.4948, "step": 11245, "task_loss": 0.6251381039619446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3979555070400238, "epoch": 9.51, "learning_rate": 2.7425565887104348e-06, "loss": 0.5793, "step": 11246, "task_loss": 0.2804372310638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5097079873085022, "epoch": 9.51, "learning_rate": 2.737860430168123e-06, "loss": 0.5428, "step": 11247, "task_loss": 1.2022968530654907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6925046443939209, "epoch": 9.51, "learning_rate": 2.7331642716258104e-06, "loss": 0.4705, "step": 11248, "task_loss": 0.45006683468818665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46897977590560913, "epoch": 9.51, "learning_rate": 2.7284681130834976e-06, "loss": 0.4792, "step": 11249, "task_loss": 0.30467385053634644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6085516214370728, "epoch": 9.51, "learning_rate": 2.7237719545411852e-06, "loss": 0.5106, "step": 11250, "task_loss": 1.0562021732330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5249451398849487, "epoch": 9.51, "learning_rate": 2.7190757959988733e-06, "loss": 0.7262, "step": 11251, "task_loss": 0.4065154492855072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5905717015266418, "epoch": 9.51, "learning_rate": 2.714379637456561e-06, "loss": 0.4682, "step": 11252, "task_loss": 0.44764643907546997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4434654116630554, "epoch": 9.51, "learning_rate": 2.709683478914248e-06, "loss": 0.3969, "step": 11253, "task_loss": 0.8883944749832153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9155362248420715, "epoch": 9.51, "learning_rate": 2.7049873203719357e-06, "loss": 0.7403, "step": 11254, "task_loss": 1.9760304689407349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2908838391304016, "epoch": 9.51, "learning_rate": 2.7002911618296237e-06, "loss": 0.4126, "step": 11255, "task_loss": 0.2298804372549057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3395985960960388, "epoch": 9.51, "learning_rate": 2.6955950032873113e-06, "loss": 0.4989, "step": 11256, "task_loss": 0.6705290079116821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6614788174629211, "epoch": 9.52, "learning_rate": 2.6908988447449985e-06, "loss": 0.4866, "step": 11257, "task_loss": 0.5600391626358032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4609816074371338, "epoch": 9.52, "learning_rate": 2.6862026862026865e-06, "loss": 0.5995, "step": 11258, "task_loss": 0.5131989121437073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3335059583187103, "epoch": 9.52, "learning_rate": 2.681506527660374e-06, "loss": 0.4402, "step": 11259, "task_loss": 0.5076111555099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4667609930038452, "epoch": 9.52, "learning_rate": 2.6768103691180617e-06, "loss": 0.459, "step": 11260, "task_loss": 0.5480345487594604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5583664178848267, "epoch": 9.52, "learning_rate": 2.672114210575749e-06, "loss": 0.5342, "step": 11261, "task_loss": 0.49604201316833496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5955600142478943, "epoch": 9.52, "learning_rate": 2.667418052033437e-06, "loss": 0.5301, "step": 11262, "task_loss": 0.6410098671913147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.064489483833313, "epoch": 9.52, "learning_rate": 2.6627218934911246e-06, "loss": 0.595, "step": 11263, "task_loss": 0.5184519290924072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4310872554779053, "epoch": 9.52, "learning_rate": 2.658025734948812e-06, "loss": 0.496, "step": 11264, "task_loss": 0.7246324419975281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3592475950717926, "epoch": 9.52, "learning_rate": 2.6533295764064994e-06, "loss": 0.452, "step": 11265, "task_loss": 0.1540917158126831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6691977381706238, "epoch": 9.52, "learning_rate": 2.6486334178641874e-06, "loss": 0.6251, "step": 11266, "task_loss": 0.608206033706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7982854843139648, "epoch": 9.52, "learning_rate": 2.643937259321875e-06, "loss": 0.5489, "step": 11267, "task_loss": 1.2630400657653809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5251617431640625, "epoch": 9.52, "learning_rate": 2.639241100779562e-06, "loss": 0.5587, "step": 11268, "task_loss": 0.5851114988327026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6244267821311951, "epoch": 9.53, "learning_rate": 2.63454494223725e-06, "loss": 0.4604, "step": 11269, "task_loss": 0.6942358613014221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3187119662761688, "epoch": 9.53, "learning_rate": 2.629848783694938e-06, "loss": 0.5795, "step": 11270, "task_loss": 0.3793101906776428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47814345359802246, "epoch": 9.53, "learning_rate": 2.6251526251526255e-06, "loss": 0.5729, "step": 11271, "task_loss": 0.982180655002594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.24039261043071747, "epoch": 9.53, "learning_rate": 2.6204564666103126e-06, "loss": 0.3678, "step": 11272, "task_loss": 0.14001110196113586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6196792125701904, "epoch": 9.53, "learning_rate": 2.6157603080680003e-06, "loss": 0.5614, "step": 11273, "task_loss": 0.9769834280014038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6720485687255859, "epoch": 9.53, "learning_rate": 2.6110641495256883e-06, "loss": 0.4556, "step": 11274, "task_loss": 0.5077792406082153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4280302822589874, "epoch": 9.53, "learning_rate": 2.606367990983376e-06, "loss": 0.5717, "step": 11275, "task_loss": 0.4765695333480835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3548336625099182, "epoch": 9.53, "learning_rate": 2.601671832441063e-06, "loss": 0.4716, "step": 11276, "task_loss": 0.23736508190631866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2945220172405243, "epoch": 9.53, "learning_rate": 2.5969756738987507e-06, "loss": 0.4943, "step": 11277, "task_loss": 0.08198618143796921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4968324303627014, "epoch": 9.53, "learning_rate": 2.5922795153564387e-06, "loss": 0.4399, "step": 11278, "task_loss": 0.7858983874320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3508654534816742, "epoch": 9.53, "learning_rate": 2.5875833568141263e-06, "loss": 0.4329, "step": 11279, "task_loss": 1.3209810256958008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31715792417526245, "epoch": 9.53, "learning_rate": 2.5828871982718135e-06, "loss": 0.4148, "step": 11280, "task_loss": 0.45970574021339417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6455689072608948, "epoch": 9.54, "learning_rate": 2.578191039729501e-06, "loss": 0.5056, "step": 11281, "task_loss": 2.1212663650512695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6583182215690613, "epoch": 9.54, "learning_rate": 2.573494881187189e-06, "loss": 0.7417, "step": 11282, "task_loss": 0.46663668751716614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7477010488510132, "epoch": 9.54, "learning_rate": 2.5687987226448768e-06, "loss": 0.6183, "step": 11283, "task_loss": 1.1823301315307617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.591083824634552, "epoch": 9.54, "learning_rate": 2.564102564102564e-06, "loss": 0.4667, "step": 11284, "task_loss": 0.5827910900115967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41403722763061523, "epoch": 9.54, "learning_rate": 2.559406405560252e-06, "loss": 0.5062, "step": 11285, "task_loss": 1.3135507106781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5059385895729065, "epoch": 9.54, "learning_rate": 2.5547102470179396e-06, "loss": 0.5766, "step": 11286, "task_loss": 0.7129079699516296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4497736096382141, "epoch": 9.54, "learning_rate": 2.5500140884756272e-06, "loss": 0.5952, "step": 11287, "task_loss": 0.3709377348423004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9262888431549072, "epoch": 9.54, "learning_rate": 2.5453179299333144e-06, "loss": 0.6886, "step": 11288, "task_loss": 1.3071322441101074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4404759407043457, "epoch": 9.54, "learning_rate": 2.5406217713910024e-06, "loss": 0.4869, "step": 11289, "task_loss": 0.5400684475898743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5740737915039062, "epoch": 9.54, "learning_rate": 2.53592561284869e-06, "loss": 0.6466, "step": 11290, "task_loss": 1.1421723365783691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3339034914970398, "epoch": 9.54, "learning_rate": 2.5312294543063777e-06, "loss": 0.4779, "step": 11291, "task_loss": 0.5302637219429016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3346141278743744, "epoch": 9.54, "learning_rate": 2.526533295764065e-06, "loss": 0.4762, "step": 11292, "task_loss": 1.3013646602630615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.065484881401062, "epoch": 9.55, "learning_rate": 2.521837137221753e-06, "loss": 0.6517, "step": 11293, "task_loss": 0.6578007340431213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7507924437522888, "epoch": 9.55, "learning_rate": 2.5171409786794405e-06, "loss": 0.5953, "step": 11294, "task_loss": 0.45313555002212524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3916409909725189, "epoch": 9.55, "learning_rate": 2.512444820137128e-06, "loss": 0.4647, "step": 11295, "task_loss": 0.05582456290721893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.91286301612854, "epoch": 9.55, "learning_rate": 2.5077486615948153e-06, "loss": 0.595, "step": 11296, "task_loss": 0.8451696634292603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29353976249694824, "epoch": 9.55, "learning_rate": 2.5030525030525033e-06, "loss": 0.484, "step": 11297, "task_loss": 0.8092113137245178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4351484477519989, "epoch": 9.55, "learning_rate": 2.498356344510191e-06, "loss": 0.5125, "step": 11298, "task_loss": 0.7138547897338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5433275699615479, "epoch": 9.55, "learning_rate": 2.4936601859678785e-06, "loss": 0.4803, "step": 11299, "task_loss": 0.9670594334602356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4451245069503784, "epoch": 9.55, "learning_rate": 2.4889640274255657e-06, "loss": 0.6376, "step": 11300, "task_loss": 0.4943684935569763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6339473724365234, "epoch": 9.55, "learning_rate": 2.4842678688832538e-06, "loss": 0.6747, "step": 11301, "task_loss": 1.3400665521621704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3447936773300171, "epoch": 9.55, "learning_rate": 2.4795717103409414e-06, "loss": 0.5308, "step": 11302, "task_loss": 0.5028912425041199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7479475736618042, "epoch": 9.55, "learning_rate": 2.474875551798629e-06, "loss": 0.4428, "step": 11303, "task_loss": 0.39573919773101807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43042832612991333, "epoch": 9.56, "learning_rate": 2.470179393256316e-06, "loss": 0.6145, "step": 11304, "task_loss": 0.48770764470100403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6956771612167358, "epoch": 9.56, "learning_rate": 2.465483234714004e-06, "loss": 0.4791, "step": 11305, "task_loss": 0.5823579430580139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40375658869743347, "epoch": 9.56, "learning_rate": 2.460787076171692e-06, "loss": 0.5237, "step": 11306, "task_loss": 0.7839794158935547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5448088645935059, "epoch": 9.56, "learning_rate": 2.456090917629379e-06, "loss": 0.4768, "step": 11307, "task_loss": 0.505507230758667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35148441791534424, "epoch": 9.56, "learning_rate": 2.4513947590870666e-06, "loss": 0.5431, "step": 11308, "task_loss": 0.5940706133842468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6245949268341064, "epoch": 9.56, "learning_rate": 2.4466986005447546e-06, "loss": 0.5536, "step": 11309, "task_loss": 0.6330739259719849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27357620000839233, "epoch": 9.56, "learning_rate": 2.4420024420024423e-06, "loss": 0.4598, "step": 11310, "task_loss": 0.3091890513896942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37369900941848755, "epoch": 9.56, "learning_rate": 2.4373062834601294e-06, "loss": 0.5864, "step": 11311, "task_loss": 0.637654185295105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27571484446525574, "epoch": 9.56, "learning_rate": 2.4326101249178175e-06, "loss": 0.5353, "step": 11312, "task_loss": 0.8123959898948669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5271096229553223, "epoch": 9.56, "learning_rate": 2.427913966375505e-06, "loss": 0.5632, "step": 11313, "task_loss": 0.6251791715621948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4978424310684204, "epoch": 9.56, "learning_rate": 2.4232178078331927e-06, "loss": 0.5198, "step": 11314, "task_loss": 1.0911293029785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47109702229499817, "epoch": 9.56, "learning_rate": 2.41852164929088e-06, "loss": 0.5242, "step": 11315, "task_loss": 0.7716821432113647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.501494288444519, "epoch": 9.57, "learning_rate": 2.413825490748568e-06, "loss": 0.5089, "step": 11316, "task_loss": 0.7109674215316772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.679937481880188, "epoch": 9.57, "learning_rate": 2.4091293322062555e-06, "loss": 0.4764, "step": 11317, "task_loss": 0.4361709654331207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35518068075180054, "epoch": 9.57, "learning_rate": 2.404433173663943e-06, "loss": 0.467, "step": 11318, "task_loss": 0.7475529909133911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.568520188331604, "epoch": 9.57, "learning_rate": 2.3997370151216303e-06, "loss": 0.544, "step": 11319, "task_loss": 0.4720578193664551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3220349848270416, "epoch": 9.57, "learning_rate": 2.3950408565793184e-06, "loss": 0.4714, "step": 11320, "task_loss": 0.7138441801071167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4054856598377228, "epoch": 9.57, "learning_rate": 2.390344698037006e-06, "loss": 0.5885, "step": 11321, "task_loss": 0.4209631383419037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5548774600028992, "epoch": 9.57, "learning_rate": 2.3856485394946936e-06, "loss": 0.4313, "step": 11322, "task_loss": 0.3082820475101471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4837498366832733, "epoch": 9.57, "learning_rate": 2.3809523809523808e-06, "loss": 0.4914, "step": 11323, "task_loss": 0.40177401900291443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4989627003669739, "epoch": 9.57, "learning_rate": 2.376256222410069e-06, "loss": 0.8006, "step": 11324, "task_loss": 0.6497212648391724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4094073176383972, "epoch": 9.57, "learning_rate": 2.3715600638677564e-06, "loss": 0.4625, "step": 11325, "task_loss": 0.4828144311904907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40828120708465576, "epoch": 9.57, "learning_rate": 2.366863905325444e-06, "loss": 0.5198, "step": 11326, "task_loss": 1.229005217552185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3773658573627472, "epoch": 9.57, "learning_rate": 2.362167746783131e-06, "loss": 0.4491, "step": 11327, "task_loss": 0.20502349734306335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5077579617500305, "epoch": 9.58, "learning_rate": 2.3574715882408192e-06, "loss": 0.5436, "step": 11328, "task_loss": 0.8033504486083984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7943429946899414, "epoch": 9.58, "learning_rate": 2.352775429698507e-06, "loss": 0.559, "step": 11329, "task_loss": 0.4793477952480316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3950050473213196, "epoch": 9.58, "learning_rate": 2.3480792711561945e-06, "loss": 0.4086, "step": 11330, "task_loss": 0.730297863483429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4508638083934784, "epoch": 9.58, "learning_rate": 2.3433831126138816e-06, "loss": 0.3725, "step": 11331, "task_loss": 0.7669834494590759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.416669636964798, "epoch": 9.58, "learning_rate": 2.3386869540715697e-06, "loss": 0.4423, "step": 11332, "task_loss": 0.4165184497833252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6083411574363708, "epoch": 9.58, "learning_rate": 2.3339907955292573e-06, "loss": 0.5312, "step": 11333, "task_loss": 0.9701744318008423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48547062277793884, "epoch": 9.58, "learning_rate": 2.329294636986945e-06, "loss": 0.4196, "step": 11334, "task_loss": 0.09378504753112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6530546545982361, "epoch": 9.58, "learning_rate": 2.324598478444632e-06, "loss": 0.526, "step": 11335, "task_loss": 0.6071787476539612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44750842452049255, "epoch": 9.58, "learning_rate": 2.31990231990232e-06, "loss": 0.4795, "step": 11336, "task_loss": 0.4807664453983307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4324527382850647, "epoch": 9.58, "learning_rate": 2.3152061613600077e-06, "loss": 0.498, "step": 11337, "task_loss": 0.028464380651712418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45919033885002136, "epoch": 9.58, "learning_rate": 2.3105100028176953e-06, "loss": 0.6393, "step": 11338, "task_loss": 1.1389330625534058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26782023906707764, "epoch": 9.58, "learning_rate": 2.305813844275383e-06, "loss": 0.3574, "step": 11339, "task_loss": 0.4858621060848236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7754696607589722, "epoch": 9.59, "learning_rate": 2.3011176857330706e-06, "loss": 0.5754, "step": 11340, "task_loss": 0.22865431010723114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6703416109085083, "epoch": 9.59, "learning_rate": 2.296421527190758e-06, "loss": 0.607, "step": 11341, "task_loss": 1.3635218143463135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4295088052749634, "epoch": 9.59, "learning_rate": 2.2917253686484458e-06, "loss": 0.5457, "step": 11342, "task_loss": 0.8019813299179077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49722224473953247, "epoch": 9.59, "learning_rate": 2.2870292101061334e-06, "loss": 0.5803, "step": 11343, "task_loss": 0.5000154376029968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6082929372787476, "epoch": 9.59, "learning_rate": 2.282333051563821e-06, "loss": 0.5924, "step": 11344, "task_loss": 1.2574076652526855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39489513635635376, "epoch": 9.59, "learning_rate": 2.2776368930215086e-06, "loss": 0.4818, "step": 11345, "task_loss": 0.6837270855903625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8449753522872925, "epoch": 9.59, "learning_rate": 2.272940734479196e-06, "loss": 0.709, "step": 11346, "task_loss": 1.0779989957809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3333572447299957, "epoch": 9.59, "learning_rate": 2.268244575936884e-06, "loss": 0.5152, "step": 11347, "task_loss": 0.21496634185314178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5039761662483215, "epoch": 9.59, "learning_rate": 2.2635484173945714e-06, "loss": 0.4809, "step": 11348, "task_loss": 1.1403034925460815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45210978388786316, "epoch": 9.59, "learning_rate": 2.258852258852259e-06, "loss": 0.5549, "step": 11349, "task_loss": 0.4723094403743744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6134293079376221, "epoch": 9.59, "learning_rate": 2.2541561003099462e-06, "loss": 0.5576, "step": 11350, "task_loss": 0.9434386491775513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.442374587059021, "epoch": 9.59, "learning_rate": 2.2494599417676343e-06, "loss": 0.6503, "step": 11351, "task_loss": 0.2529735267162323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5978698134422302, "epoch": 9.6, "learning_rate": 2.244763783225322e-06, "loss": 0.5574, "step": 11352, "task_loss": 0.43565264344215393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6101328730583191, "epoch": 9.6, "learning_rate": 2.2400676246830095e-06, "loss": 0.5711, "step": 11353, "task_loss": 0.9153672456741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7736935019493103, "epoch": 9.6, "learning_rate": 2.2353714661406967e-06, "loss": 0.5049, "step": 11354, "task_loss": 0.26655423641204834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5841749906539917, "epoch": 9.6, "learning_rate": 2.2306753075983847e-06, "loss": 0.6524, "step": 11355, "task_loss": 1.1711784601211548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49926966428756714, "epoch": 9.6, "learning_rate": 2.2259791490560723e-06, "loss": 0.5932, "step": 11356, "task_loss": 1.5748735666275024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.23277291655540466, "epoch": 9.6, "learning_rate": 2.22128299051376e-06, "loss": 0.4584, "step": 11357, "task_loss": 0.8467684984207153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5206531286239624, "epoch": 9.6, "learning_rate": 2.216586831971447e-06, "loss": 0.4915, "step": 11358, "task_loss": 0.45865365862846375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4101598858833313, "epoch": 9.6, "learning_rate": 2.211890673429135e-06, "loss": 0.4704, "step": 11359, "task_loss": 0.8500105142593384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6126411557197571, "epoch": 9.6, "learning_rate": 2.2071945148868228e-06, "loss": 0.562, "step": 11360, "task_loss": 1.0517301559448242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.50624680519104, "epoch": 9.6, "learning_rate": 2.2024983563445104e-06, "loss": 0.5617, "step": 11361, "task_loss": 0.7073647379875183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3042835295200348, "epoch": 9.6, "learning_rate": 2.197802197802198e-06, "loss": 0.3655, "step": 11362, "task_loss": 0.21476058661937714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4579913914203644, "epoch": 9.6, "learning_rate": 2.1931060392598856e-06, "loss": 0.4399, "step": 11363, "task_loss": 0.38955485820770264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8641185760498047, "epoch": 9.61, "learning_rate": 2.188409880717573e-06, "loss": 0.6753, "step": 11364, "task_loss": 0.8704508543014526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5644605755805969, "epoch": 9.61, "learning_rate": 2.183713722175261e-06, "loss": 0.4579, "step": 11365, "task_loss": 0.7010282278060913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8767354488372803, "epoch": 9.61, "learning_rate": 2.1790175636329484e-06, "loss": 0.6482, "step": 11366, "task_loss": 1.1051936149597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41551673412323, "epoch": 9.61, "learning_rate": 2.174321405090636e-06, "loss": 0.3793, "step": 11367, "task_loss": 0.4250624179840088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3770271837711334, "epoch": 9.61, "learning_rate": 2.1696252465483236e-06, "loss": 0.346, "step": 11368, "task_loss": 0.6807384490966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2927069067955017, "epoch": 9.61, "learning_rate": 2.1649290880060113e-06, "loss": 0.4418, "step": 11369, "task_loss": 0.10040730983018875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4206555187702179, "epoch": 9.61, "learning_rate": 2.160232929463699e-06, "loss": 0.5424, "step": 11370, "task_loss": 0.02807283028960228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5097132921218872, "epoch": 9.61, "learning_rate": 2.1555367709213865e-06, "loss": 0.4996, "step": 11371, "task_loss": 0.14528372883796692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.212254285812378, "epoch": 9.61, "learning_rate": 2.150840612379074e-06, "loss": 0.77, "step": 11372, "task_loss": 1.0149073600769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3413701057434082, "epoch": 9.61, "learning_rate": 2.1461444538367617e-06, "loss": 0.4637, "step": 11373, "task_loss": 0.5785795450210571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2077215015888214, "epoch": 9.61, "learning_rate": 2.1414482952944493e-06, "loss": 0.5194, "step": 11374, "task_loss": 0.4637937545776367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5268356800079346, "epoch": 9.61, "learning_rate": 2.136752136752137e-06, "loss": 0.5422, "step": 11375, "task_loss": 0.41784340143203735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4584645926952362, "epoch": 9.62, "learning_rate": 2.1320559782098245e-06, "loss": 0.5946, "step": 11376, "task_loss": 0.7697169780731201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42890146374702454, "epoch": 9.62, "learning_rate": 2.127359819667512e-06, "loss": 0.4079, "step": 11377, "task_loss": 0.5323693156242371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34654513001441956, "epoch": 9.62, "learning_rate": 2.1226636611251998e-06, "loss": 0.4402, "step": 11378, "task_loss": 0.5559029579162598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7698297500610352, "epoch": 9.62, "learning_rate": 2.1179675025828874e-06, "loss": 0.5272, "step": 11379, "task_loss": 0.4025132954120636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8065383434295654, "epoch": 9.62, "learning_rate": 2.113271344040575e-06, "loss": 0.6317, "step": 11380, "task_loss": 0.722902774810791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6418182849884033, "epoch": 9.62, "learning_rate": 2.1085751854982626e-06, "loss": 0.5592, "step": 11381, "task_loss": 0.7413223385810852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6465228796005249, "epoch": 9.62, "learning_rate": 2.10387902695595e-06, "loss": 0.5976, "step": 11382, "task_loss": 0.6990059018135071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5830527544021606, "epoch": 9.62, "learning_rate": 2.099182868413638e-06, "loss": 0.6527, "step": 11383, "task_loss": 1.2063448429107666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35674935579299927, "epoch": 9.62, "learning_rate": 2.0944867098713254e-06, "loss": 0.546, "step": 11384, "task_loss": 0.754609227180481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6285964250564575, "epoch": 9.62, "learning_rate": 2.0897905513290126e-06, "loss": 0.5898, "step": 11385, "task_loss": 0.421176016330719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8485491275787354, "epoch": 9.62, "learning_rate": 2.0850943927867006e-06, "loss": 0.5845, "step": 11386, "task_loss": 0.7623026371002197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36015641689300537, "epoch": 9.63, "learning_rate": 2.0803982342443882e-06, "loss": 0.5057, "step": 11387, "task_loss": 0.5767558217048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5697177648544312, "epoch": 9.63, "learning_rate": 2.075702075702076e-06, "loss": 0.6388, "step": 11388, "task_loss": 1.2033848762512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5609574317932129, "epoch": 9.63, "learning_rate": 2.0710059171597635e-06, "loss": 0.5668, "step": 11389, "task_loss": 1.1689881086349487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3769562542438507, "epoch": 9.63, "learning_rate": 2.066309758617451e-06, "loss": 0.3853, "step": 11390, "task_loss": 0.627342700958252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9064114093780518, "epoch": 9.63, "learning_rate": 2.0616136000751387e-06, "loss": 0.8, "step": 11391, "task_loss": 1.698434829711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4534938335418701, "epoch": 9.63, "learning_rate": 2.0569174415328263e-06, "loss": 0.5359, "step": 11392, "task_loss": 0.3029283881187439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27526289224624634, "epoch": 9.63, "learning_rate": 2.052221282990514e-06, "loss": 0.5997, "step": 11393, "task_loss": 0.6870337724685669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6988538503646851, "epoch": 9.63, "learning_rate": 2.0475251244482015e-06, "loss": 0.4933, "step": 11394, "task_loss": 1.2264176607131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5033267736434937, "epoch": 9.63, "learning_rate": 2.042828965905889e-06, "loss": 0.5801, "step": 11395, "task_loss": 0.28577178716659546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2712342441082001, "epoch": 9.63, "learning_rate": 2.0381328073635767e-06, "loss": 0.4701, "step": 11396, "task_loss": 0.041305821388959885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6948150992393494, "epoch": 9.63, "learning_rate": 2.0334366488212643e-06, "loss": 0.5206, "step": 11397, "task_loss": 0.2705252170562744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5131797790527344, "epoch": 9.63, "learning_rate": 2.028740490278952e-06, "loss": 0.5664, "step": 11398, "task_loss": 0.9425686597824097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7910622358322144, "epoch": 9.64, "learning_rate": 2.0240443317366396e-06, "loss": 0.623, "step": 11399, "task_loss": 0.373815655708313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2773650884628296, "epoch": 9.64, "learning_rate": 2.019348173194327e-06, "loss": 0.4221, "step": 11400, "task_loss": 0.4856134355068207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34700414538383484, "epoch": 9.64, "learning_rate": 2.0146520146520148e-06, "loss": 0.535, "step": 11401, "task_loss": 0.5109906196594238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36448150873184204, "epoch": 9.64, "learning_rate": 2.0099558561097024e-06, "loss": 0.4651, "step": 11402, "task_loss": 0.30071714520454407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48587363958358765, "epoch": 9.64, "learning_rate": 2.00525969756739e-06, "loss": 0.4584, "step": 11403, "task_loss": 0.7365081310272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.317171573638916, "epoch": 9.64, "learning_rate": 2.0005635390250776e-06, "loss": 0.5336, "step": 11404, "task_loss": 0.5099436044692993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4770129323005676, "epoch": 9.64, "learning_rate": 1.9958673804827652e-06, "loss": 0.3974, "step": 11405, "task_loss": 0.9173585176467896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.387908935546875, "epoch": 9.64, "learning_rate": 1.991171221940453e-06, "loss": 0.5447, "step": 11406, "task_loss": 0.6485097408294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2975608706474304, "epoch": 9.64, "learning_rate": 1.9864750633981404e-06, "loss": 0.6818, "step": 11407, "task_loss": 0.5057646632194519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8049953579902649, "epoch": 9.64, "learning_rate": 1.981778904855828e-06, "loss": 0.6527, "step": 11408, "task_loss": 0.6023550033569336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4158722758293152, "epoch": 9.64, "learning_rate": 1.9770827463135157e-06, "loss": 0.4384, "step": 11409, "task_loss": 0.8810976147651672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6338034272193909, "epoch": 9.64, "learning_rate": 1.9723865877712033e-06, "loss": 0.5533, "step": 11410, "task_loss": 0.422211229801178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20183518528938293, "epoch": 9.65, "learning_rate": 1.967690429228891e-06, "loss": 0.5289, "step": 11411, "task_loss": 0.18101917207241058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3412478566169739, "epoch": 9.65, "learning_rate": 1.9629942706865785e-06, "loss": 0.3614, "step": 11412, "task_loss": 1.0988258123397827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6582709550857544, "epoch": 9.65, "learning_rate": 1.958298112144266e-06, "loss": 0.532, "step": 11413, "task_loss": 0.9655368328094482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3807488679885864, "epoch": 9.65, "learning_rate": 1.9536019536019537e-06, "loss": 0.351, "step": 11414, "task_loss": 0.0759316012263298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36646178364753723, "epoch": 9.65, "learning_rate": 1.9489057950596413e-06, "loss": 0.5103, "step": 11415, "task_loss": 0.6569719910621643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46015650033950806, "epoch": 9.65, "learning_rate": 1.944209636517329e-06, "loss": 0.4993, "step": 11416, "task_loss": 0.3372751772403717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46350613236427307, "epoch": 9.65, "learning_rate": 1.9395134779750165e-06, "loss": 0.4979, "step": 11417, "task_loss": 0.3964754343032837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7152127623558044, "epoch": 9.65, "learning_rate": 1.934817319432704e-06, "loss": 0.4895, "step": 11418, "task_loss": 0.2720735967159271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.432297021150589, "epoch": 9.65, "learning_rate": 1.9301211608903918e-06, "loss": 0.5948, "step": 11419, "task_loss": 0.48711010813713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7634737491607666, "epoch": 9.65, "learning_rate": 1.9254250023480794e-06, "loss": 0.7019, "step": 11420, "task_loss": 0.8792864680290222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6778569221496582, "epoch": 9.65, "learning_rate": 1.920728843805767e-06, "loss": 0.4817, "step": 11421, "task_loss": 0.6431885957717896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3309305012226105, "epoch": 9.65, "learning_rate": 1.9160326852634546e-06, "loss": 0.4711, "step": 11422, "task_loss": 0.019695384427905083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3831704258918762, "epoch": 9.66, "learning_rate": 1.911336526721142e-06, "loss": 0.5397, "step": 11423, "task_loss": 0.31559550762176514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4908909499645233, "epoch": 9.66, "learning_rate": 1.90664036817883e-06, "loss": 0.5216, "step": 11424, "task_loss": 0.5803307890892029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6039223074913025, "epoch": 9.66, "learning_rate": 1.9019442096365174e-06, "loss": 0.5381, "step": 11425, "task_loss": 0.9097698330879211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3814285099506378, "epoch": 9.66, "learning_rate": 1.897248051094205e-06, "loss": 0.5373, "step": 11426, "task_loss": 0.3421352505683899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3630097508430481, "epoch": 9.66, "learning_rate": 1.8925518925518924e-06, "loss": 0.4927, "step": 11427, "task_loss": 0.5205262303352356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3492605686187744, "epoch": 9.66, "learning_rate": 1.8878557340095805e-06, "loss": 0.6895, "step": 11428, "task_loss": 0.4407202899456024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39480993151664734, "epoch": 9.66, "learning_rate": 1.8831595754672679e-06, "loss": 0.5088, "step": 11429, "task_loss": 0.9788179993629456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31586894392967224, "epoch": 9.66, "learning_rate": 1.8784634169249555e-06, "loss": 0.4813, "step": 11430, "task_loss": 0.6838805079460144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8324129581451416, "epoch": 9.66, "learning_rate": 1.8737672583826429e-06, "loss": 0.5918, "step": 11431, "task_loss": 0.7188927531242371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4706302285194397, "epoch": 9.66, "learning_rate": 1.869071099840331e-06, "loss": 0.4293, "step": 11432, "task_loss": 0.5183086395263672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5760665535926819, "epoch": 9.66, "learning_rate": 1.8643749412980183e-06, "loss": 0.5366, "step": 11433, "task_loss": 1.4357893466949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6407067179679871, "epoch": 9.66, "learning_rate": 1.859678782755706e-06, "loss": 0.5939, "step": 11434, "task_loss": 0.7987834215164185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37229210138320923, "epoch": 9.67, "learning_rate": 1.8549826242133933e-06, "loss": 0.5008, "step": 11435, "task_loss": 0.283566415309906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3849557340145111, "epoch": 9.67, "learning_rate": 1.8502864656710811e-06, "loss": 0.5514, "step": 11436, "task_loss": 0.3423067033290863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6533241271972656, "epoch": 9.67, "learning_rate": 1.8455903071287688e-06, "loss": 0.488, "step": 11437, "task_loss": 1.2742427587509155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25612151622772217, "epoch": 9.67, "learning_rate": 1.8408941485864564e-06, "loss": 0.3962, "step": 11438, "task_loss": 0.12319464981555939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7691036462783813, "epoch": 9.67, "learning_rate": 1.8361979900441438e-06, "loss": 0.6162, "step": 11439, "task_loss": 1.5722628831863403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6252167224884033, "epoch": 9.67, "learning_rate": 1.8315018315018316e-06, "loss": 0.54, "step": 11440, "task_loss": 0.35404253005981445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5172237753868103, "epoch": 9.67, "learning_rate": 1.8268056729595192e-06, "loss": 0.607, "step": 11441, "task_loss": 0.9308105707168579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27958208322525024, "epoch": 9.67, "learning_rate": 1.8221095144172068e-06, "loss": 0.5482, "step": 11442, "task_loss": 0.5811054110527039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5526999831199646, "epoch": 9.67, "learning_rate": 1.8174133558748946e-06, "loss": 0.5631, "step": 11443, "task_loss": 0.7441183924674988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5582119226455688, "epoch": 9.67, "learning_rate": 1.812717197332582e-06, "loss": 0.6481, "step": 11444, "task_loss": 1.1997815370559692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7663733959197998, "epoch": 9.67, "learning_rate": 1.8080210387902696e-06, "loss": 0.5549, "step": 11445, "task_loss": 1.0760046243667603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48026636242866516, "epoch": 9.67, "learning_rate": 1.8033248802479572e-06, "loss": 0.5647, "step": 11446, "task_loss": 0.5653250813484192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5247679352760315, "epoch": 9.68, "learning_rate": 1.798628721705645e-06, "loss": 0.5945, "step": 11447, "task_loss": 1.5842952728271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5619928240776062, "epoch": 9.68, "learning_rate": 1.7939325631633325e-06, "loss": 0.5334, "step": 11448, "task_loss": 0.11431525647640228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38217905163764954, "epoch": 9.68, "learning_rate": 1.78923640462102e-06, "loss": 0.4343, "step": 11449, "task_loss": 0.17117930948734283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2910740375518799, "epoch": 9.68, "learning_rate": 1.7845402460787075e-06, "loss": 0.6241, "step": 11450, "task_loss": 1.0246496200561523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3700796663761139, "epoch": 9.68, "learning_rate": 1.7798440875363955e-06, "loss": 0.423, "step": 11451, "task_loss": 0.9428666830062866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28177887201309204, "epoch": 9.68, "learning_rate": 1.775147928994083e-06, "loss": 0.5776, "step": 11452, "task_loss": 0.48674842715263367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40159133076667786, "epoch": 9.68, "learning_rate": 1.7704517704517705e-06, "loss": 0.5967, "step": 11453, "task_loss": 1.018568754196167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5420736074447632, "epoch": 9.68, "learning_rate": 1.765755611909458e-06, "loss": 0.4989, "step": 11454, "task_loss": 0.30141696333885193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8138407468795776, "epoch": 9.68, "learning_rate": 1.761059453367146e-06, "loss": 0.5488, "step": 11455, "task_loss": 0.5972869992256165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6012736558914185, "epoch": 9.68, "learning_rate": 1.7563632948248333e-06, "loss": 0.7249, "step": 11456, "task_loss": 0.6163088083267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6256277561187744, "epoch": 9.68, "learning_rate": 1.751667136282521e-06, "loss": 0.5429, "step": 11457, "task_loss": 0.18946219980716705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2902298867702484, "epoch": 9.69, "learning_rate": 1.7469709777402084e-06, "loss": 0.4499, "step": 11458, "task_loss": 0.518046498298645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6199654340744019, "epoch": 9.69, "learning_rate": 1.7422748191978964e-06, "loss": 0.4597, "step": 11459, "task_loss": 0.2770633101463318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4486556649208069, "epoch": 9.69, "learning_rate": 1.7375786606555838e-06, "loss": 0.5637, "step": 11460, "task_loss": 1.0077593326568604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26702263951301575, "epoch": 9.69, "learning_rate": 1.7328825021132714e-06, "loss": 0.4551, "step": 11461, "task_loss": 1.410670280456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.356871098279953, "epoch": 9.69, "learning_rate": 1.7281863435709588e-06, "loss": 0.5788, "step": 11462, "task_loss": 0.6124656200408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7186338901519775, "epoch": 9.69, "learning_rate": 1.7234901850286468e-06, "loss": 0.6833, "step": 11463, "task_loss": 0.5351365208625793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5803250670433044, "epoch": 9.69, "learning_rate": 1.7187940264863342e-06, "loss": 0.6266, "step": 11464, "task_loss": 0.41014429926872253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6916131973266602, "epoch": 9.69, "learning_rate": 1.7140978679440218e-06, "loss": 0.6371, "step": 11465, "task_loss": 0.7057671546936035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4370342791080475, "epoch": 9.69, "learning_rate": 1.7094017094017097e-06, "loss": 0.5309, "step": 11466, "task_loss": 0.4182102382183075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5826621651649475, "epoch": 9.69, "learning_rate": 1.7047055508593973e-06, "loss": 0.588, "step": 11467, "task_loss": 0.2382158488035202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8353487253189087, "epoch": 9.69, "learning_rate": 1.7000093923170847e-06, "loss": 0.5951, "step": 11468, "task_loss": 0.9908468127250671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6293667554855347, "epoch": 9.69, "learning_rate": 1.6953132337747723e-06, "loss": 0.6013, "step": 11469, "task_loss": 0.7203791737556458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.1999642848968506, "epoch": 9.7, "learning_rate": 1.69061707523246e-06, "loss": 0.6438, "step": 11470, "task_loss": 0.9174990653991699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47182023525238037, "epoch": 9.7, "learning_rate": 1.6859209166901477e-06, "loss": 0.5863, "step": 11471, "task_loss": 0.2513229548931122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4197593927383423, "epoch": 9.7, "learning_rate": 1.6812247581478351e-06, "loss": 0.4886, "step": 11472, "task_loss": 0.3842479884624481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35015416145324707, "epoch": 9.7, "learning_rate": 1.6765285996055227e-06, "loss": 0.5745, "step": 11473, "task_loss": 0.5927641987800598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5260219573974609, "epoch": 9.7, "learning_rate": 1.6718324410632105e-06, "loss": 0.4678, "step": 11474, "task_loss": 0.2036322057247162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7272312641143799, "epoch": 9.7, "learning_rate": 1.667136282520898e-06, "loss": 0.6014, "step": 11475, "task_loss": 0.299737811088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46678540110588074, "epoch": 9.7, "learning_rate": 1.6624401239785856e-06, "loss": 0.4281, "step": 11476, "task_loss": 0.5127730965614319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33670303225517273, "epoch": 9.7, "learning_rate": 1.6577439654362732e-06, "loss": 0.5122, "step": 11477, "task_loss": 0.3591921627521515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4550689458847046, "epoch": 9.7, "learning_rate": 1.653047806893961e-06, "loss": 0.4194, "step": 11478, "task_loss": 0.33631694316864014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7339975833892822, "epoch": 9.7, "learning_rate": 1.6483516483516484e-06, "loss": 0.5662, "step": 11479, "task_loss": 1.0520552396774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6990529298782349, "epoch": 9.7, "learning_rate": 1.643655489809336e-06, "loss": 0.6022, "step": 11480, "task_loss": 1.287522792816162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36819988489151, "epoch": 9.7, "learning_rate": 1.6389593312670236e-06, "loss": 0.4681, "step": 11481, "task_loss": 0.6823718547821045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7517868280410767, "epoch": 9.71, "learning_rate": 1.6342631727247114e-06, "loss": 0.6539, "step": 11482, "task_loss": 0.8537051677703857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49158775806427, "epoch": 9.71, "learning_rate": 1.6295670141823988e-06, "loss": 0.6412, "step": 11483, "task_loss": 1.2917907238006592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7884390354156494, "epoch": 9.71, "learning_rate": 1.6248708556400864e-06, "loss": 0.7169, "step": 11484, "task_loss": 1.4464924335479736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6628308892250061, "epoch": 9.71, "learning_rate": 1.620174697097774e-06, "loss": 0.5296, "step": 11485, "task_loss": 0.9179601669311523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37098443508148193, "epoch": 9.71, "learning_rate": 1.6154785385554619e-06, "loss": 0.4818, "step": 11486, "task_loss": 2.040372133255005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4042167067527771, "epoch": 9.71, "learning_rate": 1.6107823800131493e-06, "loss": 0.6047, "step": 11487, "task_loss": 0.5229744911193848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8282198905944824, "epoch": 9.71, "learning_rate": 1.6060862214708369e-06, "loss": 0.6515, "step": 11488, "task_loss": 1.1605839729309082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5591453909873962, "epoch": 9.71, "learning_rate": 1.6013900629285243e-06, "loss": 0.523, "step": 11489, "task_loss": 0.4052616059780121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5347902178764343, "epoch": 9.71, "learning_rate": 1.5966939043862123e-06, "loss": 0.6062, "step": 11490, "task_loss": 0.501980185508728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5177268981933594, "epoch": 9.71, "learning_rate": 1.5919977458438997e-06, "loss": 0.5345, "step": 11491, "task_loss": 0.1114128977060318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44069892168045044, "epoch": 9.71, "learning_rate": 1.5873015873015873e-06, "loss": 0.5572, "step": 11492, "task_loss": 0.3993459641933441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5387635231018066, "epoch": 9.71, "learning_rate": 1.5826054287592751e-06, "loss": 0.6122, "step": 11493, "task_loss": 0.5208805203437805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6882684230804443, "epoch": 9.72, "learning_rate": 1.5779092702169627e-06, "loss": 0.6071, "step": 11494, "task_loss": 1.050876259803772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25800374150276184, "epoch": 9.72, "learning_rate": 1.5732131116746501e-06, "loss": 0.5395, "step": 11495, "task_loss": 0.8320268392562866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6293847560882568, "epoch": 9.72, "learning_rate": 1.5685169531323378e-06, "loss": 0.6958, "step": 11496, "task_loss": 0.7482283115386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5255011916160583, "epoch": 9.72, "learning_rate": 1.5638207945900256e-06, "loss": 0.5407, "step": 11497, "task_loss": 0.9688664674758911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41892117261886597, "epoch": 9.72, "learning_rate": 1.5591246360477132e-06, "loss": 0.7519, "step": 11498, "task_loss": 0.41321319341659546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4400743842124939, "epoch": 9.72, "learning_rate": 1.5544284775054006e-06, "loss": 0.415, "step": 11499, "task_loss": 0.21726928651332855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.390968918800354, "epoch": 9.72, "learning_rate": 1.5497323189630884e-06, "loss": 0.4337, "step": 11500, "task_loss": 0.8967593312263489 }, { "epoch": 9.72, "eval_accuracy": 0.9057425742574258, "eval_loss": 0.3511705696582794, "eval_runtime": 227.2672, "eval_samples_per_second": 111.103, "eval_steps_per_second": 0.871, "step": 11500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5567768812179565, "epoch": 9.72, "learning_rate": 1.5450361604207758e-06, "loss": 0.5434, "step": 11501, "task_loss": 0.2658913731575012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46981245279312134, "epoch": 9.72, "learning_rate": 1.5403400018784636e-06, "loss": 0.4256, "step": 11502, "task_loss": 0.14546000957489014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3608664572238922, "epoch": 9.72, "learning_rate": 1.535643843336151e-06, "loss": 0.4488, "step": 11503, "task_loss": 0.5932294130325317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.638058602809906, "epoch": 9.72, "learning_rate": 1.5309476847938389e-06, "loss": 0.6472, "step": 11504, "task_loss": 0.14560745656490326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40062224864959717, "epoch": 9.72, "learning_rate": 1.5262515262515263e-06, "loss": 0.4603, "step": 11505, "task_loss": 0.8395342826843262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2785366475582123, "epoch": 9.73, "learning_rate": 1.521555367709214e-06, "loss": 0.3652, "step": 11506, "task_loss": 0.09998361021280289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5614135265350342, "epoch": 9.73, "learning_rate": 1.5168592091669015e-06, "loss": 0.4011, "step": 11507, "task_loss": 0.3780283033847809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6255232691764832, "epoch": 9.73, "learning_rate": 1.5121630506245893e-06, "loss": 0.56, "step": 11508, "task_loss": 1.5151031017303467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43301820755004883, "epoch": 9.73, "learning_rate": 1.5074668920822767e-06, "loss": 0.4538, "step": 11509, "task_loss": 0.14908526837825775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4144566059112549, "epoch": 9.73, "learning_rate": 1.5027707335399643e-06, "loss": 0.6267, "step": 11510, "task_loss": 0.521141767501831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5095493793487549, "epoch": 9.73, "learning_rate": 1.498074574997652e-06, "loss": 0.6482, "step": 11511, "task_loss": 0.8237716555595398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6125508546829224, "epoch": 9.73, "learning_rate": 1.4933784164553395e-06, "loss": 0.5238, "step": 11512, "task_loss": 0.26352524757385254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.437438040971756, "epoch": 9.73, "learning_rate": 1.4886822579130271e-06, "loss": 0.6446, "step": 11513, "task_loss": 0.255998820066452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40522947907447815, "epoch": 9.73, "learning_rate": 1.4839860993707147e-06, "loss": 0.5732, "step": 11514, "task_loss": 0.19258660078048706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41361743211746216, "epoch": 9.73, "learning_rate": 1.4792899408284024e-06, "loss": 0.4055, "step": 11515, "task_loss": 0.2814204692840576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5382248759269714, "epoch": 9.73, "learning_rate": 1.47459378228609e-06, "loss": 0.4374, "step": 11516, "task_loss": 0.300976037979126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.569606363773346, "epoch": 9.73, "learning_rate": 1.4698976237437776e-06, "loss": 0.5846, "step": 11517, "task_loss": 1.4356436729431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.26649874448776245, "epoch": 9.74, "learning_rate": 1.4652014652014652e-06, "loss": 0.4426, "step": 11518, "task_loss": 0.7886174321174622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4209505319595337, "epoch": 9.74, "learning_rate": 1.460505306659153e-06, "loss": 0.6332, "step": 11519, "task_loss": 0.3427625596523285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37186598777770996, "epoch": 9.74, "learning_rate": 1.4558091481168404e-06, "loss": 0.4816, "step": 11520, "task_loss": 0.19172817468643188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35611990094184875, "epoch": 9.74, "learning_rate": 1.4511129895745282e-06, "loss": 0.5433, "step": 11521, "task_loss": 0.2556125819683075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8351103067398071, "epoch": 9.74, "learning_rate": 1.4464168310322156e-06, "loss": 0.6653, "step": 11522, "task_loss": 0.6433969736099243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4323684871196747, "epoch": 9.74, "learning_rate": 1.4417206724899034e-06, "loss": 0.5032, "step": 11523, "task_loss": 0.5185337662696838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4899367392063141, "epoch": 9.74, "learning_rate": 1.4370245139475908e-06, "loss": 0.5382, "step": 11524, "task_loss": 0.4096483886241913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4018746614456177, "epoch": 9.74, "learning_rate": 1.4323283554052787e-06, "loss": 0.4922, "step": 11525, "task_loss": 0.870339035987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5089036226272583, "epoch": 9.74, "learning_rate": 1.427632196862966e-06, "loss": 0.5942, "step": 11526, "task_loss": 1.0038331747055054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5607277154922485, "epoch": 9.74, "learning_rate": 1.4229360383206539e-06, "loss": 0.5483, "step": 11527, "task_loss": 0.5007983446121216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35664600133895874, "epoch": 9.74, "learning_rate": 1.4182398797783413e-06, "loss": 0.5427, "step": 11528, "task_loss": 1.0948646068572998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6890609860420227, "epoch": 9.75, "learning_rate": 1.4135437212360291e-06, "loss": 0.5119, "step": 11529, "task_loss": 0.6640431880950928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6069000363349915, "epoch": 9.75, "learning_rate": 1.4088475626937165e-06, "loss": 0.4472, "step": 11530, "task_loss": 0.28757908940315247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5472804307937622, "epoch": 9.75, "learning_rate": 1.4041514041514043e-06, "loss": 0.5132, "step": 11531, "task_loss": 0.5858628153800964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3309595584869385, "epoch": 9.75, "learning_rate": 1.3994552456090917e-06, "loss": 0.4889, "step": 11532, "task_loss": 0.3597802221775055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.0271484851837158, "epoch": 9.75, "learning_rate": 1.3947590870667795e-06, "loss": 0.6455, "step": 11533, "task_loss": 1.0215789079666138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.443034827709198, "epoch": 9.75, "learning_rate": 1.390062928524467e-06, "loss": 0.4848, "step": 11534, "task_loss": 0.2977057695388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7900955080986023, "epoch": 9.75, "learning_rate": 1.3853667699821548e-06, "loss": 0.481, "step": 11535, "task_loss": 1.3864778280258179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.392417848110199, "epoch": 9.75, "learning_rate": 1.3806706114398422e-06, "loss": 0.4273, "step": 11536, "task_loss": 0.5588244199752808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6663950681686401, "epoch": 9.75, "learning_rate": 1.37597445289753e-06, "loss": 0.5253, "step": 11537, "task_loss": 1.1801608800888062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 1.243700623512268, "epoch": 9.75, "learning_rate": 1.3712782943552174e-06, "loss": 0.742, "step": 11538, "task_loss": 1.4689083099365234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4488237798213959, "epoch": 9.75, "learning_rate": 1.3665821358129052e-06, "loss": 0.5058, "step": 11539, "task_loss": 0.1712009608745575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45357435941696167, "epoch": 9.75, "learning_rate": 1.3618859772705926e-06, "loss": 0.569, "step": 11540, "task_loss": 0.6335695385932922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4018586277961731, "epoch": 9.76, "learning_rate": 1.3571898187282804e-06, "loss": 0.5226, "step": 11541, "task_loss": 0.15843738615512848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5824781060218811, "epoch": 9.76, "learning_rate": 1.3524936601859678e-06, "loss": 0.7349, "step": 11542, "task_loss": 1.4136425256729126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5276750922203064, "epoch": 9.76, "learning_rate": 1.3477975016436557e-06, "loss": 0.4919, "step": 11543, "task_loss": 0.6889218688011169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5535430312156677, "epoch": 9.76, "learning_rate": 1.3431013431013433e-06, "loss": 0.4041, "step": 11544, "task_loss": 1.014448881149292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.483516126871109, "epoch": 9.76, "learning_rate": 1.3384051845590309e-06, "loss": 0.5345, "step": 11545, "task_loss": 0.6694908142089844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5338544249534607, "epoch": 9.76, "learning_rate": 1.3337090260167185e-06, "loss": 0.5485, "step": 11546, "task_loss": 1.0113564729690552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3086256980895996, "epoch": 9.76, "learning_rate": 1.329012867474406e-06, "loss": 0.5753, "step": 11547, "task_loss": 0.6875080466270447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37699294090270996, "epoch": 9.76, "learning_rate": 1.3243167089320937e-06, "loss": 0.4969, "step": 11548, "task_loss": 0.1622782051563263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.561211347579956, "epoch": 9.76, "learning_rate": 1.319620550389781e-06, "loss": 0.5388, "step": 11549, "task_loss": 1.0040934085845947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4216953217983246, "epoch": 9.76, "learning_rate": 1.314924391847469e-06, "loss": 0.4537, "step": 11550, "task_loss": 0.8901730179786682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6656798720359802, "epoch": 9.76, "learning_rate": 1.3102282333051563e-06, "loss": 0.5279, "step": 11551, "task_loss": 0.6245513558387756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5316826105117798, "epoch": 9.76, "learning_rate": 1.3055320747628441e-06, "loss": 0.6651, "step": 11552, "task_loss": 1.250679850578308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3855898082256317, "epoch": 9.77, "learning_rate": 1.3008359162205315e-06, "loss": 0.5085, "step": 11553, "task_loss": 1.1959214210510254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3168773055076599, "epoch": 9.77, "learning_rate": 1.2961397576782194e-06, "loss": 0.3888, "step": 11554, "task_loss": 0.569686233997345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.651950478553772, "epoch": 9.77, "learning_rate": 1.2914435991359068e-06, "loss": 0.5338, "step": 11555, "task_loss": 0.7176133990287781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5993804931640625, "epoch": 9.77, "learning_rate": 1.2867474405935946e-06, "loss": 0.4859, "step": 11556, "task_loss": 0.31746017932891846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31995171308517456, "epoch": 9.77, "learning_rate": 1.282051282051282e-06, "loss": 0.477, "step": 11557, "task_loss": 0.662502110004425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8163994550704956, "epoch": 9.77, "learning_rate": 1.2773551235089698e-06, "loss": 0.6399, "step": 11558, "task_loss": 0.5642949938774109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6283993721008301, "epoch": 9.77, "learning_rate": 1.2726589649666572e-06, "loss": 0.6732, "step": 11559, "task_loss": 1.2210075855255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4720362424850464, "epoch": 9.77, "learning_rate": 1.267962806424345e-06, "loss": 0.5127, "step": 11560, "task_loss": 0.5592157244682312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6326269507408142, "epoch": 9.77, "learning_rate": 1.2632666478820324e-06, "loss": 0.4377, "step": 11561, "task_loss": 0.5415890216827393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7122247219085693, "epoch": 9.77, "learning_rate": 1.2585704893397202e-06, "loss": 0.825, "step": 11562, "task_loss": 0.4452231824398041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38139837980270386, "epoch": 9.77, "learning_rate": 1.2538743307974076e-06, "loss": 0.5184, "step": 11563, "task_loss": 1.0324598550796509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5226012468338013, "epoch": 9.77, "learning_rate": 1.2491781722550955e-06, "loss": 0.5152, "step": 11564, "task_loss": 0.3369823694229126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6828691959381104, "epoch": 9.78, "learning_rate": 1.2444820137127829e-06, "loss": 0.6023, "step": 11565, "task_loss": 0.8556783199310303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45525631308555603, "epoch": 9.78, "learning_rate": 1.2397858551704707e-06, "loss": 0.3918, "step": 11566, "task_loss": 0.5870350003242493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4529707133769989, "epoch": 9.78, "learning_rate": 1.235089696628158e-06, "loss": 0.5443, "step": 11567, "task_loss": 0.3626823127269745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39108002185821533, "epoch": 9.78, "learning_rate": 1.230393538085846e-06, "loss": 0.5113, "step": 11568, "task_loss": 1.2324045896530151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6650864481925964, "epoch": 9.78, "learning_rate": 1.2256973795435333e-06, "loss": 0.564, "step": 11569, "task_loss": 1.095869541168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6135939955711365, "epoch": 9.78, "learning_rate": 1.2210012210012211e-06, "loss": 0.633, "step": 11570, "task_loss": 1.5814604759216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6984860301017761, "epoch": 9.78, "learning_rate": 1.2163050624589087e-06, "loss": 0.4799, "step": 11571, "task_loss": 0.4044111669063568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6684539318084717, "epoch": 9.78, "learning_rate": 1.2116089039165963e-06, "loss": 0.5773, "step": 11572, "task_loss": 0.9324235320091248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35391858220100403, "epoch": 9.78, "learning_rate": 1.206912745374284e-06, "loss": 0.3707, "step": 11573, "task_loss": 0.3308321237564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4062829613685608, "epoch": 9.78, "learning_rate": 1.2022165868319716e-06, "loss": 0.3745, "step": 11574, "task_loss": 0.7701423764228821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6568615436553955, "epoch": 9.78, "learning_rate": 1.1975204282896592e-06, "loss": 0.547, "step": 11575, "task_loss": 0.6077378392219543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6105924844741821, "epoch": 9.78, "learning_rate": 1.1928242697473468e-06, "loss": 0.5293, "step": 11576, "task_loss": 1.178383231163025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3055509328842163, "epoch": 9.79, "learning_rate": 1.1881281112050344e-06, "loss": 0.4596, "step": 11577, "task_loss": 0.5228952169418335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5921518802642822, "epoch": 9.79, "learning_rate": 1.183431952662722e-06, "loss": 0.5094, "step": 11578, "task_loss": 0.2918110191822052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40331828594207764, "epoch": 9.79, "learning_rate": 1.1787357941204096e-06, "loss": 0.4365, "step": 11579, "task_loss": 0.49102702736854553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.717434287071228, "epoch": 9.79, "learning_rate": 1.1740396355780972e-06, "loss": 0.5516, "step": 11580, "task_loss": 0.9940044283866882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3069460988044739, "epoch": 9.79, "learning_rate": 1.1693434770357848e-06, "loss": 0.4929, "step": 11581, "task_loss": 0.3099232017993927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8899157047271729, "epoch": 9.79, "learning_rate": 1.1646473184934725e-06, "loss": 0.5714, "step": 11582, "task_loss": 1.061488151550293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33348381519317627, "epoch": 9.79, "learning_rate": 1.15995115995116e-06, "loss": 0.6079, "step": 11583, "task_loss": 0.09759016335010529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.423615038394928, "epoch": 9.79, "learning_rate": 1.1552550014088477e-06, "loss": 0.4984, "step": 11584, "task_loss": 0.8607199788093567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.515560507774353, "epoch": 9.79, "learning_rate": 1.1505588428665353e-06, "loss": 0.5499, "step": 11585, "task_loss": 0.5212283134460449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5573514699935913, "epoch": 9.79, "learning_rate": 1.1458626843242229e-06, "loss": 0.5308, "step": 11586, "task_loss": 0.6055467128753662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46745622158050537, "epoch": 9.79, "learning_rate": 1.1411665257819105e-06, "loss": 0.5789, "step": 11587, "task_loss": 1.0636824369430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47048380970954895, "epoch": 9.79, "learning_rate": 1.136470367239598e-06, "loss": 0.5039, "step": 11588, "task_loss": 0.31840845942497253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6769927740097046, "epoch": 9.8, "learning_rate": 1.1317742086972857e-06, "loss": 0.5405, "step": 11589, "task_loss": 0.6843545436859131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9910627603530884, "epoch": 9.8, "learning_rate": 1.1270780501549731e-06, "loss": 0.6836, "step": 11590, "task_loss": 0.8024782538414001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5615391135215759, "epoch": 9.8, "learning_rate": 1.122381891612661e-06, "loss": 0.6264, "step": 11591, "task_loss": 0.3703474998474121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33381354808807373, "epoch": 9.8, "learning_rate": 1.1176857330703483e-06, "loss": 0.516, "step": 11592, "task_loss": 0.6174180507659912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27954569458961487, "epoch": 9.8, "learning_rate": 1.1129895745280362e-06, "loss": 0.4798, "step": 11593, "task_loss": 0.4783615171909332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6030651330947876, "epoch": 9.8, "learning_rate": 1.1082934159857236e-06, "loss": 0.6021, "step": 11594, "task_loss": 1.0057975053787231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41649362444877625, "epoch": 9.8, "learning_rate": 1.1035972574434114e-06, "loss": 0.4583, "step": 11595, "task_loss": 0.3739801049232483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27364665269851685, "epoch": 9.8, "learning_rate": 1.098901098901099e-06, "loss": 0.5547, "step": 11596, "task_loss": 1.8313324451446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3823930621147156, "epoch": 9.8, "learning_rate": 1.0942049403587866e-06, "loss": 0.4341, "step": 11597, "task_loss": 0.6088560223579407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49057888984680176, "epoch": 9.8, "learning_rate": 1.0895087818164742e-06, "loss": 0.6453, "step": 11598, "task_loss": 0.9247868657112122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4640599489212036, "epoch": 9.8, "learning_rate": 1.0848126232741618e-06, "loss": 0.5071, "step": 11599, "task_loss": 0.5801272988319397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33312439918518066, "epoch": 9.81, "learning_rate": 1.0801164647318494e-06, "loss": 0.501, "step": 11600, "task_loss": 0.12407442927360535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4253785014152527, "epoch": 9.81, "learning_rate": 1.075420306189537e-06, "loss": 0.5372, "step": 11601, "task_loss": 0.5787076950073242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.668445348739624, "epoch": 9.81, "learning_rate": 1.0707241476472247e-06, "loss": 0.5008, "step": 11602, "task_loss": 0.1661590188741684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9472905397415161, "epoch": 9.81, "learning_rate": 1.0660279891049123e-06, "loss": 0.5975, "step": 11603, "task_loss": 0.24609535932540894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3680833578109741, "epoch": 9.81, "learning_rate": 1.0613318305625999e-06, "loss": 0.4816, "step": 11604, "task_loss": 0.7102903127670288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.29346439242362976, "epoch": 9.81, "learning_rate": 1.0566356720202875e-06, "loss": 0.503, "step": 11605, "task_loss": 0.2607310116291046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4590504765510559, "epoch": 9.81, "learning_rate": 1.051939513477975e-06, "loss": 0.5866, "step": 11606, "task_loss": 0.5955806374549866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25720179080963135, "epoch": 9.81, "learning_rate": 1.0472433549356627e-06, "loss": 0.525, "step": 11607, "task_loss": 0.5590839385986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6122144460678101, "epoch": 9.81, "learning_rate": 1.0425471963933503e-06, "loss": 0.6745, "step": 11608, "task_loss": 0.4045354425907135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25508996844291687, "epoch": 9.81, "learning_rate": 1.037851037851038e-06, "loss": 0.5522, "step": 11609, "task_loss": 0.32204800844192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6577528119087219, "epoch": 9.81, "learning_rate": 1.0331548793087255e-06, "loss": 0.5495, "step": 11610, "task_loss": 0.7620530724525452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3551430106163025, "epoch": 9.81, "learning_rate": 1.0284587207664131e-06, "loss": 0.3791, "step": 11611, "task_loss": 0.42685604095458984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.25118494033813477, "epoch": 9.82, "learning_rate": 1.0237625622241008e-06, "loss": 0.5701, "step": 11612, "task_loss": 0.15834520757198334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49000081419944763, "epoch": 9.82, "learning_rate": 1.0190664036817884e-06, "loss": 0.46, "step": 11613, "task_loss": 0.3670029938220978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5317562818527222, "epoch": 9.82, "learning_rate": 1.014370245139476e-06, "loss": 0.5727, "step": 11614, "task_loss": 1.1310384273529053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5481013059616089, "epoch": 9.82, "learning_rate": 1.0096740865971636e-06, "loss": 0.5879, "step": 11615, "task_loss": 0.6628435254096985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38540834188461304, "epoch": 9.82, "learning_rate": 1.0049779280548512e-06, "loss": 0.5063, "step": 11616, "task_loss": 0.7893949747085571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2162359356880188, "epoch": 9.82, "learning_rate": 1.0002817695125388e-06, "loss": 0.3939, "step": 11617, "task_loss": 0.08303603529930115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43777918815612793, "epoch": 9.82, "learning_rate": 9.955856109702264e-07, "loss": 0.4387, "step": 11618, "task_loss": 0.6692999601364136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5562664270401001, "epoch": 9.82, "learning_rate": 9.90889452427914e-07, "loss": 0.4284, "step": 11619, "task_loss": 0.626455545425415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3915444016456604, "epoch": 9.82, "learning_rate": 9.861932938856016e-07, "loss": 0.5015, "step": 11620, "task_loss": 1.145024299621582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4873444437980652, "epoch": 9.82, "learning_rate": 9.814971353432892e-07, "loss": 0.6575, "step": 11621, "task_loss": 0.5350620746612549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6686731576919556, "epoch": 9.82, "learning_rate": 9.768009768009769e-07, "loss": 0.6264, "step": 11622, "task_loss": 0.5422884225845337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.33861276507377625, "epoch": 9.82, "learning_rate": 9.721048182586645e-07, "loss": 0.497, "step": 11623, "task_loss": 0.22767645120620728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.701568603515625, "epoch": 9.83, "learning_rate": 9.67408659716352e-07, "loss": 0.4998, "step": 11624, "task_loss": 1.2405941486358643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2596818804740906, "epoch": 9.83, "learning_rate": 9.627125011740397e-07, "loss": 0.5146, "step": 11625, "task_loss": 0.18982334434986115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5603885650634766, "epoch": 9.83, "learning_rate": 9.580163426317273e-07, "loss": 0.5939, "step": 11626, "task_loss": 1.038058876991272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4759581685066223, "epoch": 9.83, "learning_rate": 9.53320184089415e-07, "loss": 0.4709, "step": 11627, "task_loss": 0.8012621998786926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.43395036458969116, "epoch": 9.83, "learning_rate": 9.486240255471025e-07, "loss": 0.5026, "step": 11628, "task_loss": 1.1749374866485596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38206666707992554, "epoch": 9.83, "learning_rate": 9.439278670047902e-07, "loss": 0.4065, "step": 11629, "task_loss": 1.007368564605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4643164873123169, "epoch": 9.83, "learning_rate": 9.392317084624777e-07, "loss": 0.5459, "step": 11630, "task_loss": 0.6300928592681885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5903993844985962, "epoch": 9.83, "learning_rate": 9.345355499201655e-07, "loss": 0.5369, "step": 11631, "task_loss": 0.4379362463951111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7203224301338196, "epoch": 9.83, "learning_rate": 9.29839391377853e-07, "loss": 0.5348, "step": 11632, "task_loss": 0.5422298312187195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5331912040710449, "epoch": 9.83, "learning_rate": 9.251432328355406e-07, "loss": 0.3797, "step": 11633, "task_loss": 0.36403870582580566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.49985480308532715, "epoch": 9.83, "learning_rate": 9.204470742932282e-07, "loss": 0.4733, "step": 11634, "task_loss": 0.2338169813156128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4326721429824829, "epoch": 9.83, "learning_rate": 9.157509157509158e-07, "loss": 0.4679, "step": 11635, "task_loss": 0.07940167933702469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5828026533126831, "epoch": 9.84, "learning_rate": 9.110547572086034e-07, "loss": 0.6053, "step": 11636, "task_loss": 0.779792845249176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47358018159866333, "epoch": 9.84, "learning_rate": 9.06358598666291e-07, "loss": 0.3773, "step": 11637, "task_loss": 1.2171730995178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7970803380012512, "epoch": 9.84, "learning_rate": 9.016624401239786e-07, "loss": 0.481, "step": 11638, "task_loss": 0.8441946506500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.233774796128273, "epoch": 9.84, "learning_rate": 8.969662815816662e-07, "loss": 0.4071, "step": 11639, "task_loss": 0.18194571137428284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.596062183380127, "epoch": 9.84, "learning_rate": 8.922701230393537e-07, "loss": 0.4433, "step": 11640, "task_loss": 0.6692358255386353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5080369114875793, "epoch": 9.84, "learning_rate": 8.875739644970415e-07, "loss": 0.5405, "step": 11641, "task_loss": 0.6118054986000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4059675335884094, "epoch": 9.84, "learning_rate": 8.82877805954729e-07, "loss": 0.5871, "step": 11642, "task_loss": 0.22557257115840912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32986581325531006, "epoch": 9.84, "learning_rate": 8.781816474124167e-07, "loss": 0.4492, "step": 11643, "task_loss": 0.1751708984375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5777475237846375, "epoch": 9.84, "learning_rate": 8.734854888701042e-07, "loss": 0.4186, "step": 11644, "task_loss": 0.741260826587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5399770736694336, "epoch": 9.84, "learning_rate": 8.687893303277919e-07, "loss": 0.4496, "step": 11645, "task_loss": 1.0818626880645752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6499859690666199, "epoch": 9.84, "learning_rate": 8.640931717854794e-07, "loss": 0.7451, "step": 11646, "task_loss": 0.6631431579589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9022203683853149, "epoch": 9.84, "learning_rate": 8.593970132431671e-07, "loss": 0.5803, "step": 11647, "task_loss": 0.7155879735946655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4263777732849121, "epoch": 9.85, "learning_rate": 8.547008547008548e-07, "loss": 0.3696, "step": 11648, "task_loss": 0.24299080669879913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34213414788246155, "epoch": 9.85, "learning_rate": 8.500046961585423e-07, "loss": 0.5425, "step": 11649, "task_loss": 0.3378290832042694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3540082573890686, "epoch": 9.85, "learning_rate": 8.4530853761623e-07, "loss": 0.4214, "step": 11650, "task_loss": 1.3626933097839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5542054772377014, "epoch": 9.85, "learning_rate": 8.406123790739176e-07, "loss": 0.4635, "step": 11651, "task_loss": 0.9357801675796509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37362322211265564, "epoch": 9.85, "learning_rate": 8.359162205316053e-07, "loss": 0.3824, "step": 11652, "task_loss": 1.2620269060134888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6783081293106079, "epoch": 9.85, "learning_rate": 8.312200619892928e-07, "loss": 0.6272, "step": 11653, "task_loss": 0.5918532609939575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4210309386253357, "epoch": 9.85, "learning_rate": 8.265239034469805e-07, "loss": 0.5351, "step": 11654, "task_loss": 1.0818651914596558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5427195429801941, "epoch": 9.85, "learning_rate": 8.21827744904668e-07, "loss": 0.4965, "step": 11655, "task_loss": 0.5718876123428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7072763442993164, "epoch": 9.85, "learning_rate": 8.171315863623557e-07, "loss": 0.5062, "step": 11656, "task_loss": 1.2994518280029297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.603376030921936, "epoch": 9.85, "learning_rate": 8.124354278200432e-07, "loss": 0.6182, "step": 11657, "task_loss": 0.3070209324359894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6007376909255981, "epoch": 9.85, "learning_rate": 8.077392692777309e-07, "loss": 0.5859, "step": 11658, "task_loss": 0.7234011292457581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4592774510383606, "epoch": 9.85, "learning_rate": 8.030431107354184e-07, "loss": 0.5964, "step": 11659, "task_loss": 0.8880534768104553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.716975748538971, "epoch": 9.86, "learning_rate": 7.983469521931062e-07, "loss": 0.6425, "step": 11660, "task_loss": 0.7926874160766602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8078975677490234, "epoch": 9.86, "learning_rate": 7.936507936507937e-07, "loss": 0.5674, "step": 11661, "task_loss": 1.9866645336151123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.702674150466919, "epoch": 9.86, "learning_rate": 7.889546351084814e-07, "loss": 0.4995, "step": 11662, "task_loss": 0.5355736613273621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6361211538314819, "epoch": 9.86, "learning_rate": 7.842584765661689e-07, "loss": 0.5624, "step": 11663, "task_loss": 1.071346402168274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5005534887313843, "epoch": 9.86, "learning_rate": 7.795623180238566e-07, "loss": 0.5035, "step": 11664, "task_loss": 0.9089404940605164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39026057720184326, "epoch": 9.86, "learning_rate": 7.748661594815442e-07, "loss": 0.5517, "step": 11665, "task_loss": 0.2000344693660736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40136778354644775, "epoch": 9.86, "learning_rate": 7.701700009392318e-07, "loss": 0.3921, "step": 11666, "task_loss": 1.0477348566055298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9167822599411011, "epoch": 9.86, "learning_rate": 7.654738423969194e-07, "loss": 0.6566, "step": 11667, "task_loss": 1.1802221536636353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40343695878982544, "epoch": 9.86, "learning_rate": 7.60777683854607e-07, "loss": 0.5839, "step": 11668, "task_loss": 0.8019670844078064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5909879207611084, "epoch": 9.86, "learning_rate": 7.560815253122946e-07, "loss": 0.4888, "step": 11669, "task_loss": 0.8121470212936401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5047805309295654, "epoch": 9.86, "learning_rate": 7.513853667699822e-07, "loss": 0.5763, "step": 11670, "task_loss": 0.42404186725616455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6522117853164673, "epoch": 9.87, "learning_rate": 7.466892082276698e-07, "loss": 0.5742, "step": 11671, "task_loss": 0.19312548637390137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6542204022407532, "epoch": 9.87, "learning_rate": 7.419930496853574e-07, "loss": 0.5004, "step": 11672, "task_loss": 0.8001068234443665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5042328834533691, "epoch": 9.87, "learning_rate": 7.37296891143045e-07, "loss": 0.5101, "step": 11673, "task_loss": 1.1608223915100098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.713342010974884, "epoch": 9.87, "learning_rate": 7.326007326007326e-07, "loss": 0.6223, "step": 11674, "task_loss": 1.5277520418167114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7675071358680725, "epoch": 9.87, "learning_rate": 7.279045740584202e-07, "loss": 0.6392, "step": 11675, "task_loss": 1.5556610822677612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7275183200836182, "epoch": 9.87, "learning_rate": 7.232084155161078e-07, "loss": 0.7833, "step": 11676, "task_loss": 1.013778805732727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20145830512046814, "epoch": 9.87, "learning_rate": 7.185122569737954e-07, "loss": 0.6169, "step": 11677, "task_loss": 0.10084031522274017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6145246624946594, "epoch": 9.87, "learning_rate": 7.13816098431483e-07, "loss": 0.473, "step": 11678, "task_loss": 0.5278776288032532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46959272027015686, "epoch": 9.87, "learning_rate": 7.091199398891706e-07, "loss": 0.5969, "step": 11679, "task_loss": 0.8945805430412292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36883777379989624, "epoch": 9.87, "learning_rate": 7.044237813468583e-07, "loss": 0.5775, "step": 11680, "task_loss": 0.5703630447387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6621829867362976, "epoch": 9.87, "learning_rate": 6.997276228045459e-07, "loss": 0.5539, "step": 11681, "task_loss": 0.19010132551193237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.28977853059768677, "epoch": 9.87, "learning_rate": 6.950314642622335e-07, "loss": 0.4927, "step": 11682, "task_loss": 0.059441763907670975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5329312086105347, "epoch": 9.88, "learning_rate": 6.903353057199211e-07, "loss": 0.5686, "step": 11683, "task_loss": 0.6747649908065796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6110724210739136, "epoch": 9.88, "learning_rate": 6.856391471776087e-07, "loss": 0.4652, "step": 11684, "task_loss": 0.7761861085891724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5337547063827515, "epoch": 9.88, "learning_rate": 6.809429886352963e-07, "loss": 0.6755, "step": 11685, "task_loss": 1.0520586967468262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.69350266456604, "epoch": 9.88, "learning_rate": 6.762468300929839e-07, "loss": 0.485, "step": 11686, "task_loss": 0.4552392065525055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45390385389328003, "epoch": 9.88, "learning_rate": 6.715506715506716e-07, "loss": 0.5791, "step": 11687, "task_loss": 0.22169362008571625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5429032444953918, "epoch": 9.88, "learning_rate": 6.668545130083592e-07, "loss": 0.4533, "step": 11688, "task_loss": 1.0706154108047485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5955007672309875, "epoch": 9.88, "learning_rate": 6.621583544660469e-07, "loss": 0.5626, "step": 11689, "task_loss": 0.730311393737793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.1659528911113739, "epoch": 9.88, "learning_rate": 6.574621959237345e-07, "loss": 0.4902, "step": 11690, "task_loss": 0.09856458008289337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6393058896064758, "epoch": 9.88, "learning_rate": 6.527660373814221e-07, "loss": 0.4387, "step": 11691, "task_loss": 1.0841938257217407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38988596200942993, "epoch": 9.88, "learning_rate": 6.480698788391097e-07, "loss": 0.6805, "step": 11692, "task_loss": 1.0809043645858765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4136083424091339, "epoch": 9.88, "learning_rate": 6.433737202967973e-07, "loss": 0.5176, "step": 11693, "task_loss": 0.4464772939682007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3602759540081024, "epoch": 9.88, "learning_rate": 6.386775617544849e-07, "loss": 0.4603, "step": 11694, "task_loss": 0.8915525674819946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4933736026287079, "epoch": 9.89, "learning_rate": 6.339814032121725e-07, "loss": 0.5011, "step": 11695, "task_loss": 0.20792953670024872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7183307409286499, "epoch": 9.89, "learning_rate": 6.292852446698601e-07, "loss": 0.537, "step": 11696, "task_loss": 0.6770106554031372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6647278666496277, "epoch": 9.89, "learning_rate": 6.245890861275477e-07, "loss": 0.4902, "step": 11697, "task_loss": 0.4130721092224121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.567448616027832, "epoch": 9.89, "learning_rate": 6.198929275852353e-07, "loss": 0.6651, "step": 11698, "task_loss": 0.9031826853752136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5885971784591675, "epoch": 9.89, "learning_rate": 6.15196769042923e-07, "loss": 0.4846, "step": 11699, "task_loss": 0.6086427569389343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4790969491004944, "epoch": 9.89, "learning_rate": 6.105006105006106e-07, "loss": 0.4786, "step": 11700, "task_loss": 0.6837748289108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2886730432510376, "epoch": 9.89, "learning_rate": 6.058044519582982e-07, "loss": 0.3612, "step": 11701, "task_loss": 1.1823729276657104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6294911503791809, "epoch": 9.89, "learning_rate": 6.011082934159858e-07, "loss": 0.4841, "step": 11702, "task_loss": 0.4175061285495758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5599687099456787, "epoch": 9.89, "learning_rate": 5.964121348736734e-07, "loss": 0.5953, "step": 11703, "task_loss": 0.9556514024734497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8798742890357971, "epoch": 9.89, "learning_rate": 5.91715976331361e-07, "loss": 0.6146, "step": 11704, "task_loss": 1.3099777698516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8467668294906616, "epoch": 9.89, "learning_rate": 5.870198177890486e-07, "loss": 0.8594, "step": 11705, "task_loss": 0.611789345741272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40101730823516846, "epoch": 9.89, "learning_rate": 5.823236592467362e-07, "loss": 0.5325, "step": 11706, "task_loss": 0.07936819642782211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6615309119224548, "epoch": 9.9, "learning_rate": 5.776275007044238e-07, "loss": 0.5374, "step": 11707, "task_loss": 0.6743787527084351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5921816825866699, "epoch": 9.9, "learning_rate": 5.729313421621114e-07, "loss": 0.7002, "step": 11708, "task_loss": 1.563610553741455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38395875692367554, "epoch": 9.9, "learning_rate": 5.68235183619799e-07, "loss": 0.511, "step": 11709, "task_loss": 0.3957885801792145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5545816421508789, "epoch": 9.9, "learning_rate": 5.635390250774866e-07, "loss": 0.7129, "step": 11710, "task_loss": 0.9234458208084106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47973838448524475, "epoch": 9.9, "learning_rate": 5.588428665351742e-07, "loss": 0.5939, "step": 11711, "task_loss": 0.9120413064956665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41496413946151733, "epoch": 9.9, "learning_rate": 5.541467079928618e-07, "loss": 0.4431, "step": 11712, "task_loss": 0.6001289486885071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7490970492362976, "epoch": 9.9, "learning_rate": 5.494505494505495e-07, "loss": 0.7053, "step": 11713, "task_loss": 0.42996713519096375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4524564743041992, "epoch": 9.9, "learning_rate": 5.447543909082371e-07, "loss": 0.4594, "step": 11714, "task_loss": 0.8817393779754639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.515659749507904, "epoch": 9.9, "learning_rate": 5.400582323659247e-07, "loss": 0.5946, "step": 11715, "task_loss": 0.8906511664390564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4314442276954651, "epoch": 9.9, "learning_rate": 5.353620738236123e-07, "loss": 0.4708, "step": 11716, "task_loss": 0.9123784899711609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.46263352036476135, "epoch": 9.9, "learning_rate": 5.306659152812999e-07, "loss": 0.7295, "step": 11717, "task_loss": 0.8897507786750793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6467429399490356, "epoch": 9.9, "learning_rate": 5.259697567389875e-07, "loss": 0.4722, "step": 11718, "task_loss": 1.1356202363967896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.658686637878418, "epoch": 9.91, "learning_rate": 5.212735981966752e-07, "loss": 0.6096, "step": 11719, "task_loss": 0.6746451258659363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6877976655960083, "epoch": 9.91, "learning_rate": 5.165774396543628e-07, "loss": 0.5476, "step": 11720, "task_loss": 1.2474610805511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36353200674057007, "epoch": 9.91, "learning_rate": 5.118812811120504e-07, "loss": 0.4975, "step": 11721, "task_loss": 0.9711368680000305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45902496576309204, "epoch": 9.91, "learning_rate": 5.07185122569738e-07, "loss": 0.5595, "step": 11722, "task_loss": 1.4435758590698242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3983616232872009, "epoch": 9.91, "learning_rate": 5.024889640274256e-07, "loss": 0.5831, "step": 11723, "task_loss": 0.23008567094802856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4096594452857971, "epoch": 9.91, "learning_rate": 4.977928054851132e-07, "loss": 0.602, "step": 11724, "task_loss": 0.9441166520118713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6864545345306396, "epoch": 9.91, "learning_rate": 4.930966469428008e-07, "loss": 0.5048, "step": 11725, "task_loss": 0.36237865686416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3644891083240509, "epoch": 9.91, "learning_rate": 4.884004884004884e-07, "loss": 0.4523, "step": 11726, "task_loss": 0.37528926134109497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7014000415802002, "epoch": 9.91, "learning_rate": 4.83704329858176e-07, "loss": 0.5005, "step": 11727, "task_loss": 0.7368527054786682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.39181607961654663, "epoch": 9.91, "learning_rate": 4.790081713158637e-07, "loss": 0.4966, "step": 11728, "task_loss": 1.5060133934020996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6878873109817505, "epoch": 9.91, "learning_rate": 4.7431201277355126e-07, "loss": 0.6024, "step": 11729, "task_loss": 0.7393627762794495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4629337191581726, "epoch": 9.91, "learning_rate": 4.6961585423123887e-07, "loss": 0.5482, "step": 11730, "task_loss": 1.063454270362854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5009876489639282, "epoch": 9.92, "learning_rate": 4.649196956889265e-07, "loss": 0.7925, "step": 11731, "task_loss": 0.6962403655052185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8067636489868164, "epoch": 9.92, "learning_rate": 4.602235371466141e-07, "loss": 0.528, "step": 11732, "task_loss": 1.8258051872253418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7342110276222229, "epoch": 9.92, "learning_rate": 4.555273786043017e-07, "loss": 0.6111, "step": 11733, "task_loss": 0.9588983654975891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4452546238899231, "epoch": 9.92, "learning_rate": 4.508312200619893e-07, "loss": 0.5633, "step": 11734, "task_loss": 0.6177554726600647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6966890096664429, "epoch": 9.92, "learning_rate": 4.4613506151967687e-07, "loss": 0.5864, "step": 11735, "task_loss": 0.523197591304779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3445895314216614, "epoch": 9.92, "learning_rate": 4.414389029773645e-07, "loss": 0.5029, "step": 11736, "task_loss": 0.14446979761123657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.648215651512146, "epoch": 9.92, "learning_rate": 4.367427444350521e-07, "loss": 0.6209, "step": 11737, "task_loss": 0.6089484095573425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.694189190864563, "epoch": 9.92, "learning_rate": 4.320465858927397e-07, "loss": 0.6191, "step": 11738, "task_loss": 0.828980565071106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.40063929557800293, "epoch": 9.92, "learning_rate": 4.273504273504274e-07, "loss": 0.5578, "step": 11739, "task_loss": 0.7841668725013733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34507930278778076, "epoch": 9.92, "learning_rate": 4.22654268808115e-07, "loss": 0.6069, "step": 11740, "task_loss": 0.515508770942688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5274267196655273, "epoch": 9.92, "learning_rate": 4.1795811026580264e-07, "loss": 0.4529, "step": 11741, "task_loss": 0.4202560484409332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.567223846912384, "epoch": 9.93, "learning_rate": 4.1326195172349025e-07, "loss": 0.5515, "step": 11742, "task_loss": 0.7392473220825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.38944780826568604, "epoch": 9.93, "learning_rate": 4.0856579318117786e-07, "loss": 0.5518, "step": 11743, "task_loss": 0.721247673034668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7464834451675415, "epoch": 9.93, "learning_rate": 4.0386963463886547e-07, "loss": 0.6989, "step": 11744, "task_loss": 0.30686262249946594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7815307378768921, "epoch": 9.93, "learning_rate": 3.991734760965531e-07, "loss": 0.659, "step": 11745, "task_loss": 0.9529940485954285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4590678811073303, "epoch": 9.93, "learning_rate": 3.944773175542407e-07, "loss": 0.5874, "step": 11746, "task_loss": 1.4956971406936646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.9558858275413513, "epoch": 9.93, "learning_rate": 3.897811590119283e-07, "loss": 0.6184, "step": 11747, "task_loss": 0.7298904061317444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.37069857120513916, "epoch": 9.93, "learning_rate": 3.850850004696159e-07, "loss": 0.6279, "step": 11748, "task_loss": 1.27947199344635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.569645881652832, "epoch": 9.93, "learning_rate": 3.803888419273035e-07, "loss": 0.4203, "step": 11749, "task_loss": 1.2289179563522339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5442161560058594, "epoch": 9.93, "learning_rate": 3.756926833849911e-07, "loss": 0.5799, "step": 11750, "task_loss": 1.014480710029602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3223392367362976, "epoch": 9.93, "learning_rate": 3.709965248426787e-07, "loss": 0.461, "step": 11751, "task_loss": 1.4547274112701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.31352534890174866, "epoch": 9.93, "learning_rate": 3.663003663003663e-07, "loss": 0.618, "step": 11752, "task_loss": 0.13427120447158813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7314385175704956, "epoch": 9.93, "learning_rate": 3.616042077580539e-07, "loss": 0.528, "step": 11753, "task_loss": 1.0424449443817139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3079354763031006, "epoch": 9.94, "learning_rate": 3.569080492157415e-07, "loss": 0.4352, "step": 11754, "task_loss": 0.17069625854492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.30831047892570496, "epoch": 9.94, "learning_rate": 3.5221189067342913e-07, "loss": 0.4185, "step": 11755, "task_loss": 0.17191042006015778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5566548109054565, "epoch": 9.94, "learning_rate": 3.4751573213111674e-07, "loss": 0.5972, "step": 11756, "task_loss": 0.35748881101608276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2582440972328186, "epoch": 9.94, "learning_rate": 3.4281957358880435e-07, "loss": 0.3549, "step": 11757, "task_loss": 0.09129612892866135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6168367862701416, "epoch": 9.94, "learning_rate": 3.3812341504649196e-07, "loss": 0.6388, "step": 11758, "task_loss": 0.5061416029930115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5181525945663452, "epoch": 9.94, "learning_rate": 3.334272565041796e-07, "loss": 0.4745, "step": 11759, "task_loss": 0.48060616850852966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41481879353523254, "epoch": 9.94, "learning_rate": 3.2873109796186723e-07, "loss": 0.4473, "step": 11760, "task_loss": 0.23127619922161102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4852718710899353, "epoch": 9.94, "learning_rate": 3.2403493941955484e-07, "loss": 0.5705, "step": 11761, "task_loss": 0.2514514923095703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6261858940124512, "epoch": 9.94, "learning_rate": 3.1933878087724245e-07, "loss": 0.5049, "step": 11762, "task_loss": 0.3908676505088806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5693542957305908, "epoch": 9.94, "learning_rate": 3.1464262233493006e-07, "loss": 0.531, "step": 11763, "task_loss": 0.5728221535682678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4909439980983734, "epoch": 9.94, "learning_rate": 3.0994646379261767e-07, "loss": 0.6085, "step": 11764, "task_loss": 0.5773086547851562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5992412567138672, "epoch": 9.94, "learning_rate": 3.052503052503053e-07, "loss": 0.5379, "step": 11765, "task_loss": 0.4260658025741577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.36398622393608093, "epoch": 9.95, "learning_rate": 3.005541467079929e-07, "loss": 0.3985, "step": 11766, "task_loss": 0.23804312944412231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44097012281417847, "epoch": 9.95, "learning_rate": 2.958579881656805e-07, "loss": 0.3858, "step": 11767, "task_loss": 0.17028015851974487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.339007705450058, "epoch": 9.95, "learning_rate": 2.911618296233681e-07, "loss": 0.4751, "step": 11768, "task_loss": 0.1849353164434433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.32081252336502075, "epoch": 9.95, "learning_rate": 2.864656710810557e-07, "loss": 0.4706, "step": 11769, "task_loss": 0.2316647171974182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5445622205734253, "epoch": 9.95, "learning_rate": 2.817695125387433e-07, "loss": 0.5206, "step": 11770, "task_loss": 0.3381626307964325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8521730899810791, "epoch": 9.95, "learning_rate": 2.770733539964309e-07, "loss": 0.5178, "step": 11771, "task_loss": 1.0700209140777588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4060363173484802, "epoch": 9.95, "learning_rate": 2.7237719545411855e-07, "loss": 0.466, "step": 11772, "task_loss": 1.5562776327133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48679983615875244, "epoch": 9.95, "learning_rate": 2.6768103691180616e-07, "loss": 0.552, "step": 11773, "task_loss": 0.06236816942691803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7523693442344666, "epoch": 9.95, "learning_rate": 2.629848783694938e-07, "loss": 0.5166, "step": 11774, "task_loss": 1.7538937330245972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3473876118659973, "epoch": 9.95, "learning_rate": 2.582887198271814e-07, "loss": 0.5965, "step": 11775, "task_loss": 1.2157716751098633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.41753390431404114, "epoch": 9.95, "learning_rate": 2.53592561284869e-07, "loss": 0.4986, "step": 11776, "task_loss": 0.9441444277763367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.45573607087135315, "epoch": 9.95, "learning_rate": 2.488964027425566e-07, "loss": 0.5765, "step": 11777, "task_loss": 0.7261573672294617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6504294872283936, "epoch": 9.96, "learning_rate": 2.442002442002442e-07, "loss": 0.6486, "step": 11778, "task_loss": 1.0529546737670898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2786175310611725, "epoch": 9.96, "learning_rate": 2.395040856579318e-07, "loss": 0.5115, "step": 11779, "task_loss": 0.6888869404792786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48786431550979614, "epoch": 9.96, "learning_rate": 2.3480792711561944e-07, "loss": 0.5833, "step": 11780, "task_loss": 0.7851517200469971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.47927629947662354, "epoch": 9.96, "learning_rate": 2.3011176857330705e-07, "loss": 0.4253, "step": 11781, "task_loss": 0.36839035153388977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.20583805441856384, "epoch": 9.96, "learning_rate": 2.2541561003099466e-07, "loss": 0.4041, "step": 11782, "task_loss": 0.3866371512413025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5634337663650513, "epoch": 9.96, "learning_rate": 2.2071945148868224e-07, "loss": 0.5032, "step": 11783, "task_loss": 0.6320585608482361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.35720694065093994, "epoch": 9.96, "learning_rate": 2.1602329294636985e-07, "loss": 0.5118, "step": 11784, "task_loss": 0.41417720913887024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5381942987442017, "epoch": 9.96, "learning_rate": 2.113271344040575e-07, "loss": 0.721, "step": 11785, "task_loss": 0.6323438882827759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4419476389884949, "epoch": 9.96, "learning_rate": 2.0663097586174512e-07, "loss": 0.4427, "step": 11786, "task_loss": 0.07649518549442291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6069304943084717, "epoch": 9.96, "learning_rate": 2.0193481731943273e-07, "loss": 0.4929, "step": 11787, "task_loss": 0.2779400050640106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2164638489484787, "epoch": 9.96, "learning_rate": 1.9723865877712034e-07, "loss": 0.4137, "step": 11788, "task_loss": 0.6758087873458862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6397323608398438, "epoch": 9.96, "learning_rate": 1.9254250023480795e-07, "loss": 0.4469, "step": 11789, "task_loss": 0.3655050992965698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3389008045196533, "epoch": 9.97, "learning_rate": 1.8784634169249554e-07, "loss": 0.4653, "step": 11790, "task_loss": 0.17872558534145355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42615583539009094, "epoch": 9.97, "learning_rate": 1.8315018315018315e-07, "loss": 0.378, "step": 11791, "task_loss": 0.5046189427375793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4162856340408325, "epoch": 9.97, "learning_rate": 1.7845402460787076e-07, "loss": 0.4501, "step": 11792, "task_loss": 0.10605460405349731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5327898263931274, "epoch": 9.97, "learning_rate": 1.7375786606555837e-07, "loss": 0.4982, "step": 11793, "task_loss": 0.27749985456466675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48357489705085754, "epoch": 9.97, "learning_rate": 1.6906170752324598e-07, "loss": 0.5904, "step": 11794, "task_loss": 0.49925893545150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.534135103225708, "epoch": 9.97, "learning_rate": 1.6436554898093362e-07, "loss": 0.5732, "step": 11795, "task_loss": 1.051812767982483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3636243939399719, "epoch": 9.97, "learning_rate": 1.5966939043862123e-07, "loss": 0.5997, "step": 11796, "task_loss": 0.8055181503295898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.27140694856643677, "epoch": 9.97, "learning_rate": 1.5497323189630884e-07, "loss": 0.3725, "step": 11797, "task_loss": 0.6648226976394653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7842380404472351, "epoch": 9.97, "learning_rate": 1.5027707335399645e-07, "loss": 0.6314, "step": 11798, "task_loss": 0.7309542894363403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.478877454996109, "epoch": 9.97, "learning_rate": 1.4558091481168406e-07, "loss": 0.5019, "step": 11799, "task_loss": 1.2602530717849731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.42598360776901245, "epoch": 9.97, "learning_rate": 1.4088475626937164e-07, "loss": 0.4346, "step": 11800, "task_loss": 0.2025788426399231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.44479840993881226, "epoch": 9.97, "learning_rate": 1.3618859772705928e-07, "loss": 0.401, "step": 11801, "task_loss": 0.42690378427505493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3598289489746094, "epoch": 9.98, "learning_rate": 1.314924391847469e-07, "loss": 0.3704, "step": 11802, "task_loss": 0.6306514739990234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4371466040611267, "epoch": 9.98, "learning_rate": 1.267962806424345e-07, "loss": 0.4133, "step": 11803, "task_loss": 0.977277398109436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5131658315658569, "epoch": 9.98, "learning_rate": 1.221001221001221e-07, "loss": 0.4065, "step": 11804, "task_loss": 0.47720029950141907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4955328702926636, "epoch": 9.98, "learning_rate": 1.1740396355780972e-07, "loss": 0.4955, "step": 11805, "task_loss": 0.4314495325088501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.448905348777771, "epoch": 9.98, "learning_rate": 1.1270780501549733e-07, "loss": 0.4611, "step": 11806, "task_loss": 0.334781676530838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3976094722747803, "epoch": 9.98, "learning_rate": 1.0801164647318492e-07, "loss": 0.4589, "step": 11807, "task_loss": 0.614673376083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7028859853744507, "epoch": 9.98, "learning_rate": 1.0331548793087256e-07, "loss": 0.5392, "step": 11808, "task_loss": 0.4435502588748932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4882431626319885, "epoch": 9.98, "learning_rate": 9.861932938856017e-08, "loss": 0.4171, "step": 11809, "task_loss": 0.5074236989021301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8158570528030396, "epoch": 9.98, "learning_rate": 9.392317084624777e-08, "loss": 0.6792, "step": 11810, "task_loss": 1.2517924308776855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8543037176132202, "epoch": 9.98, "learning_rate": 8.922701230393538e-08, "loss": 0.6036, "step": 11811, "task_loss": 1.1480350494384766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.50777667760849, "epoch": 9.98, "learning_rate": 8.453085376162299e-08, "loss": 0.5674, "step": 11812, "task_loss": 0.4583331048488617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.274361252784729, "epoch": 9.99, "learning_rate": 7.983469521931061e-08, "loss": 0.4557, "step": 11813, "task_loss": 0.398992121219635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4127875864505768, "epoch": 9.99, "learning_rate": 7.513853667699822e-08, "loss": 0.5221, "step": 11814, "task_loss": 0.43059873580932617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4107823669910431, "epoch": 9.99, "learning_rate": 7.044237813468582e-08, "loss": 0.5331, "step": 11815, "task_loss": 0.3799629807472229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.810592532157898, "epoch": 9.99, "learning_rate": 6.574621959237344e-08, "loss": 0.6049, "step": 11816, "task_loss": 1.771484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.48412024974823, "epoch": 9.99, "learning_rate": 6.105006105006105e-08, "loss": 0.4849, "step": 11817, "task_loss": 1.0037184953689575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3843819797039032, "epoch": 9.99, "learning_rate": 5.6353902507748664e-08, "loss": 0.4758, "step": 11818, "task_loss": 0.6112626791000366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.3779985010623932, "epoch": 9.99, "learning_rate": 5.165774396543628e-08, "loss": 0.3602, "step": 11819, "task_loss": 0.03185408189892769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6673596501350403, "epoch": 9.99, "learning_rate": 4.6961585423123884e-08, "loss": 0.5006, "step": 11820, "task_loss": 1.4893971681594849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.485041081905365, "epoch": 9.99, "learning_rate": 4.2265426880811495e-08, "loss": 0.4828, "step": 11821, "task_loss": 0.6400530934333801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.4484560191631317, "epoch": 9.99, "learning_rate": 3.756926833849911e-08, "loss": 0.4141, "step": 11822, "task_loss": 0.8475421667098999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.1720668524503708, "epoch": 9.99, "learning_rate": 3.287310979618672e-08, "loss": 0.4459, "step": 11823, "task_loss": 0.05403786897659302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.5080179572105408, "epoch": 9.99, "learning_rate": 2.8176951253874332e-08, "loss": 0.5987, "step": 11824, "task_loss": 0.8450747132301331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2977520227432251, "epoch": 10.0, "learning_rate": 2.3480792711561942e-08, "loss": 0.4515, "step": 11825, "task_loss": 0.07201095670461655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.7180753946304321, "epoch": 10.0, "learning_rate": 1.8784634169249556e-08, "loss": 0.6341, "step": 11826, "task_loss": 0.730100691318512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.34227943420410156, "epoch": 10.0, "learning_rate": 1.4088475626937166e-08, "loss": 0.5517, "step": 11827, "task_loss": 1.168721318244934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.8317971229553223, "epoch": 10.0, "learning_rate": 9.392317084624778e-09, "loss": 0.5132, "step": 11828, "task_loss": 0.9987679123878479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.6715251207351685, "epoch": 10.0, "learning_rate": 4.696158542312389e-09, "loss": 0.5417, "step": 11829, "task_loss": 0.7848132252693176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.40466940388319583, "compression/movement_sparsity/model_sparsity": 0.39076776495110943, "compression_loss": 0.0, "distillation_loss": 0.2879047989845276, "epoch": 10.0, "learning_rate": 0.0, "loss": 0.4832, "step": 11830, "task_loss": 1.1391990184783936 }, { "epoch": 10.0, "step": 11830, "total_flos": 5.9664632082415714e+19, "train_loss": 25.706991044825553, "train_runtime": 44544.7439, "train_samples_per_second": 17.005, "train_steps_per_second": 0.266 } ], "max_steps": 11830, "num_train_epochs": 10, "total_flos": 5.9664632082415714e+19, "trial_name": null, "trial_params": null }